Creating a network from a CSV file

The engine.
Post Reply
jdtoellner
Posts: 71
Joined: Mon Aug 01, 2016 9:45 pm

Creating a network from a CSV file

Post by jdtoellner »

I wrote a program that reads a CSV file and creates nodes in a simple network. I need this for a project I'm working on.

I'm not creating arcs yet. That will come soon.

(I've attached a CSV file. You'll have to change the extension to "CSV". This form wouldn't let me attach it as a csv file.)

Code: Select all

// ImportFromCSV.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include "smile.h"
#include <fstream>
#include <iostream>
#include <string>
#include <sstream>
#include <vector>

using namespace std;

struct nodeParams {			// Structure for parsed CSV line
	string ID;
	string name;
	string stateName;
	double probability;
};

struct nodeSize {			// Structure for node sizes
	int height = 100;
	int width = 200;
};

struct nodePosition {		// Structure for node positions
	const int starting_x = 100;
	const int starting_y = 100;

	int center_x = starting_x;
	int center_y = starting_y;

	int x_offset = 40;
	int y_offset = 40;

	int max_columns = 30;
};

string RemoveSpecials(string str);		// Removes illegal characters that SMILE doesn't like
nodeParams parseLine(string line);		// Parses a CSV file line; outputs into a "nodeParams" structured variable

class csvTextFile						// Class for the CSV text file
{
public:
	string name;
	ifstream cvsTextFileStream;

	csvTextFile(string);
	~csvTextFile();

private:

};

csvTextFile::csvTextFile(string fileName)	// Object constructor
{
	name = fileName;

	cvsTextFileStream.exceptions(ifstream::failbit | ifstream::badbit);

	// Open the CSV file
	try {
		cvsTextFileStream.open(name);
	}
	catch (system_error& e) {
		cerr << "Error opening Import.csv" << endl;
		cerr << e.code().message() << endl;
	}
}

csvTextFile::~csvTextFile()
{
	cvsTextFileStream.close();
}

class networkClass		// Class for the BBN network
{
public:
	DSL_network theNet;
	DSL_errorStringHandler netErrors;
	string XML_fileName;
	int format = DSL_XDSL_FORMAT;

	nodeSize defaultNodeSize;
	nodePosition nextNodePosition;

	networkClass(string);
	~networkClass();

private:

};

networkClass::networkClass(string fileName)		// BBN network constructor
{
	XML_fileName = fileName;	// All we do here is store the XML filename
}

networkClass::~networkClass()
{
}


string RemoveSpecials(string str) {		// Remove special characters that SMILE doesn't like

	// Precede numeric name with "Node_"
	if (((str[0] >= '0') && (str[0] <= '9'))) {
		str.insert(0, "Node_");
	}

	// Delete all characters that SMILE doesn't like
	int i = 0, len = str.length();
	while (i<len)
	{
		char c = str[i];
		if (((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')) || (c == '_'))
		{
			++i;
		}
		else
		{
			str.erase(i, 1);
			--len;
		}
	}
	return str;
}

nodeParams parseLine(string line) {		// Parse CSV file line

	string token;						// Token is text between commas
	nodeParams nodeParameters;			// Return variable

	istringstream line_stream;
	line_stream.str(line);

	int tokenCount = 0;

	while (!line_stream.eof()) {		// Read all tokent

		getline(line_stream, token, ',');	// Get next token (there's four per line)

		// First part of node name
		if (tokenCount == 0) {
			nodeParameters.ID = RemoveSpecials(token);
			nodeParameters.name = token;
		}
		// Second part of node name
		else if (tokenCount == 1) {
			nodeParameters.ID = nodeParameters.ID + "_" + RemoveSpecials(token);
			nodeParameters.name = nodeParameters.name + token;
		}
		// State Name
		else if (tokenCount == 2) {
			nodeParameters.stateName = RemoveSpecials(token);
		}
		// Probability
		else if (tokenCount == 3) {
			nodeParameters.probability = atof(token.c_str());
		}
		tokenCount++;
	}
	return nodeParameters;	// Return parsed parameters (ID, name, stateName, and probability)
}

int main()
{
	networkClass BBN_Network("BBN_Network.xdsl");

	nodeParams nodeParameters;		// Object that contains parsed out node parameters from parseLine

	csvTextFile csvFile("Import.csv");	// This has a constructor that opens the file and counts lines, nodes, and children

	string line;

	// Parse each line in the csv file
	while (!csvFile.cvsTextFileStream.eof()) {

		getline(csvFile.cvsTextFileStream, line, '\n');

		nodeParameters = parseLine(line); // Parse comma delimited line into specific parameters

		// Add a node (only if it's not already there)

		int nodeHandle = BBN_Network.theNet.FindNode(nodeParameters.ID.c_str());

		if (nodeHandle == DSL_OUT_OF_RANGE) {

			// Add node
			nodeHandle = BBN_Network.theNet.AddNode(DSL_CPT, nodeParameters.ID.c_str());

			// Set node name
			BBN_Network.theNet.GetNode(nodeHandle)->Info().Header().SetName(nodeParameters.name.c_str());

			// Set font
			BBN_Network.theNet.GetNode(nodeHandle)->Info().Screen().font = 1;

			// Set flags
			BBN_Network.theNet.GetNode(nodeHandle)->ExtraDefinition()->SetFlags(DSL_TRUE, DSL_FALSE, DSL_FALSE);

			// Set position on the grid
			BBN_Network.theNet.GetNode(nodeHandle)->Info().Screen().position.center_X = BBN_Network.nextNodePosition.center_x;
			BBN_Network.theNet.GetNode(nodeHandle)->Info().Screen().position.center_Y = BBN_Network.nextNodePosition.center_y;

			BBN_Network.nextNodePosition.center_x = BBN_Network.nextNodePosition.center_x + BBN_Network.nextNodePosition.x_offset + BBN_Network.defaultNodeSize.width;
			
			if (BBN_Network.nextNodePosition.center_x > (BBN_Network.defaultNodeSize.width + BBN_Network.nextNodePosition.x_offset) * BBN_Network.nextNodePosition.max_columns) {
				BBN_Network.nextNodePosition.center_x = BBN_Network.nextNodePosition.starting_x;
				BBN_Network.nextNodePosition.center_y = BBN_Network.nextNodePosition.center_y + BBN_Network.defaultNodeSize.height + BBN_Network.nextNodePosition.y_offset;
			}

			// Set size
			BBN_Network.theNet.GetNode(nodeHandle)->Info().Screen().position.height = BBN_Network.defaultNodeSize.height;
			BBN_Network.theNet.GetNode(nodeHandle)->Info().Screen().position.width = BBN_Network.defaultNodeSize.width;

			// Create states (new nodes default to two; second is a nullState)
			DSL_stringArray stateNames;
			stateNames.Add(nodeParameters.stateName.c_str());
			stateNames.Add("nullState");
			int numberOfOutcomes = BBN_Network.theNet.GetNode(nodeHandle)->Definition()->SetNumberOfOutcomes(stateNames);

			// Set probabilities (second is a placeholder)
			DSL_doubleArray probabilities;
			probabilities.SetSize(2);
			probabilities[0] = nodeParameters.probability;
			probabilities[1] = 1 - nodeParameters.probability;
			BBN_Network.theNet.GetNode(nodeHandle)->Definition()->SetDefinition(probabilities);

		} else 
		{
			DSL_nodeDefinition *def = BBN_Network.theNet.GetNode(nodeHandle)->Definition();
			const DSL_Dmatrix &cpt = *def->GetMatrix();
			const DSL_idArray &outcomes = *def->GetOutcomesNames();
			const int numberOfOutcomes = def->GetNumberOfOutcomes();

			DSL_intArray coords;

			int outcomeIdx = 0;

			while (outcomeIdx < numberOfOutcomes && outcomes[outcomeIdx] != nodeParameters.stateName) {
				outcomeIdx++;
			}

			if (outcomeIdx == numberOfOutcomes) {

				DSL_doubleArray probabilities;

				string secondOutcome = outcomes[1];

				if (secondOutcome == "nullState") {

					probabilities.SetSize(outcomeIdx);

					for (int elemIdx = 0; elemIdx < outcomeIdx; elemIdx++) {
						probabilities[elemIdx] = cpt[elemIdx];
					}

					probabilities[outcomeIdx - 1] = nodeParameters.probability;

					DSL_stringArray stateArray;
					stateArray.Add(outcomes[0]);
					stateArray.Add(nodeParameters.stateName.c_str());

					BBN_Network.theNet.GetNode(nodeHandle)->Definition()->SetNumberOfOutcomes(stateArray);

					outcomeIdx--;
				}
				else {
					probabilities.SetSize(outcomeIdx + 1);

					for (int elemIdx = 0; elemIdx < outcomeIdx; elemIdx++) {
						probabilities[elemIdx] = cpt[elemIdx];
					}

					BBN_Network.theNet.GetNode(nodeHandle)->Definition()->AddOutcome(nodeParameters.stateName.c_str());

					probabilities[outcomeIdx] = nodeParameters.probability;
				}

				BBN_Network.theNet.GetNode(nodeHandle)->Definition()->SetDefinition(probabilities);
			} else {
				cout << "Duplicate state for node: " << BBN_Network.theNet.GetNode(nodeHandle)->GetId() << " State: " << nodeParameters.stateName << endl;
			}
		}
	}

	int nodeHandle = BBN_Network.theNet.GetFirstNode();

	while (nodeHandle >= 0) {
		DSL_node *node = BBN_Network.theNet.GetNode(nodeHandle);
		DSL_nodeDefinition *def = BBN_Network.theNet.GetNode(nodeHandle)->Definition();
		const DSL_Dmatrix &cpt = *def->GetMatrix();
		const DSL_idArray &outcomes = *def->GetOutcomesNames();
		const int numberOfOutcomes = def->GetNumberOfOutcomes();

		DSL_doubleArray probabilities;
		probabilities.SetSize(numberOfOutcomes);
		double totalProbability = 0;

		for (int elemIdx = 0; elemIdx < numberOfOutcomes; elemIdx++) {
			probabilities[elemIdx] = cpt[elemIdx];
			totalProbability = totalProbability + probabilities[elemIdx];
		}

		if (totalProbability != 1.0) {
			for (int elemIdx = 0; elemIdx < numberOfOutcomes; elemIdx++) {
				probabilities[elemIdx] = probabilities[elemIdx] * 1/totalProbability;
			}
			BBN_Network.theNet.GetNode(nodeHandle)->Definition()->SetDefinition(probabilities);

			cout << "Normalized probabilities for node: " << node->GetId() << endl;
		}
		nodeHandle = BBN_Network.theNet.GetNextNode(nodeHandle);
	}

	// Write network to file in XDSL format
	BBN_Network.theNet.WriteFile(BBN_Network.XML_fileName.c_str(), BBN_Network.format);

	return 0;
}
Attachments
Import.txt
Change extension to "CSV" before using.
(283 Bytes) Downloaded 341 times
shooltz[BayesFusion]
Site Admin
Posts: 1417
Joined: Mon Nov 26, 2007 5:51 pm

Re: Creating a network from a CSV file

Post by shooltz[BayesFusion] »

This form wouldn't let me attach it as a csv file.
We've added the CSV to the list of the supported extensions.

Did you consider using DSL_dataset::ReadFile to do the CSV parsing for you?
jdtoellner
Posts: 71
Joined: Mon Aug 01, 2016 9:45 pm

Re: Creating a network from a CSV file

Post by jdtoellner »

No I hadn't thought of using ReadFile. Let me look into that.

Where can I find documentation?

(I searched the .h files. Information there is sparse. The wiki doesn't appear to say much about using ReadFile to read CSV files.)
shooltz[BayesFusion]
Site Admin
Posts: 1417
Joined: Mon Nov 26, 2007 5:51 pm

Re: Creating a network from a CSV file

Post by shooltz[BayesFusion] »

I searched the .h files. Information there is sparse. The wiki doesn't appear to say much about using ReadFile to read CSV files.
We're rewritting SMILE and SMILearn documentation now. The .h file is dataset.h. Below you can find the sample function which reads the CSV and prints out its content. Note how specific column types (which are known in advance) are handled.

Code: Select all

int ReadCsv()
{
	using namespace std;
	DSL_dataset ds;
	string detailedErrMsg;
	DSL_datasetParseParams params;
	params.columnIdsPresent = false;
	int res = ds.ReadFile("yourfile.csv", &params, &detailedErrMsg);
	if (DSL_OKAY != res)
	{
		printf("Can't read file, %s\n", detailedErrMsg.c_str());
		return res;
	}

	const vector<string> & ids0 = ds.GetStateNames(0); // text-based column 0
	const vector<string> & ids2 = ds.GetStateNames(2); // text-based column 2
	for (int rec = 0; rec < ds.GetNumberOfRecords(); rec++)
	{
		printf("%d: %s ; %d ; %s ; %f\n",
			rec, 
			ids0[ds.GetInt(0, rec)].c_str(), // column 0 (text)
			ds.GetInt(1, rec), // column 1 (int)
			ids2[ds.GetInt(2, rec)].c_str(), // column 2 (text) 
			ds.GetFloat(3, rec) // column 3 (float)
		);
	}

	return DSL_OKAY;
}
jdtoellner
Posts: 71
Joined: Mon Aug 01, 2016 9:45 pm

Re: Creating a network from a CSV file

Post by jdtoellner »

I'll give that a try.

Which include file do I need for DSL_dataset and DSL_datasetParseParams?

I'm including "smile.h" and they're undefined.

I tried including "dataset.h" but got linker errors.
shooltz[BayesFusion]
Site Admin
Posts: 1417
Joined: Mon Nov 26, 2007 5:51 pm

Re: Creating a network from a CSV file

Post by shooltz[BayesFusion] »

I'm including "smile.h" and they're undefined.
DSL_dataset is part of SMILearn. You need to include smilearn.h in your app. If you're using VIsual C++ the include file contains #pragma directive to ensure the .lib file is added to linker's options.
jdtoellner
Posts: 71
Joined: Mon Aug 01, 2016 9:45 pm

Re: Creating a network from a CSV file

Post by jdtoellner »

Thanks for the pointers. Here's better code. (Some of this, I'll admit, is a bit clumsy . . . but it works.)

Code: Select all

// ImportFromCSV.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include "smile.h"
#include "smilearn.h"
#include <fstream>
#include <iostream>
#include <string>
#include <sstream>
#include <vector>

using namespace std;

struct NODE_Position {		// Structure for node positions

	int height = 100;
	int width = 200;

	const int starting_x = 100;
	const int starting_y = 100;

	int center_x = starting_x;
	int center_y = starting_y;

	int x_offset = 40;
	int y_offset = 40;

	int max_columns = 30;
};

int main()
{
	DSL_network theNet;

	NODE_Position nodePosition;

	DSL_dataset ds;
	string detailedErrMsg;
	DSL_datasetParseParams params;
	params.columnIdsPresent = false;

	int res = ds.ReadFile("Import.csv", &params, &detailedErrMsg);
	if (DSL_OKAY != res)
	{
		printf("Can't read file, %s\n", detailedErrMsg.c_str());
		return res;
	}

	const vector<string> & ids0 = ds.GetStateNames(0); // text-based column 0
	const vector<string> & ids1 = ds.GetStateNames(1); // text-based column 2

	for (int rec = 0; rec < ds.GetNumberOfRecords(); rec++)
	{
		printf("%d: %s ; %s ; %f\n",
			rec,
			ids0[ds.GetInt(0, rec)].c_str(), // column 0 (text)
			ids1[ds.GetInt(1, rec)].c_str(), // column 1 (text)
			ds.GetFloat(2, rec) // column 3 (float)
		);

		string nodeName = ids0[ds.GetInt(0, rec)].c_str();

		char * nodeID;
		nodeID = new char[nodeName.length() + 1];
		strcpy(nodeID, nodeName.c_str());
		theNet.Header().MakeValidId(nodeID);

		string stateName = ids1[ds.GetInt(1, rec)].c_str();
		char * stateID;
		stateID = new char[stateName.length()];
		strcpy(stateID, stateName.c_str());
		theNet.Header().MakeValidId(stateID);

		string validStateName = string(stateID);

		double probability = ds.GetFloat(2, rec);

		// See if nodeID is already in network
		int nodeHandle = theNet.FindNode(nodeID);

		if (nodeHandle == DSL_OUT_OF_RANGE) {		// If not

			// Add nodeID
			nodeHandle = theNet.AddNode(DSL_CPT, nodeID);

			// Set node name
			theNet.GetNode(nodeHandle)->Info().Header().SetName(nodeName.c_str());

			// Set position on the grid
			theNet.GetNode(nodeHandle)->Info().Screen().position.center_X = nodePosition.center_x;
			theNet.GetNode(nodeHandle)->Info().Screen().position.center_Y = nodePosition.center_y;

			// Compute next position
			nodePosition.center_x = nodePosition.center_x + nodePosition.x_offset + nodePosition.width;
			
			// If column > max then go to next row
			if (nodePosition.center_x > (nodePosition.width + nodePosition.x_offset) * nodePosition.max_columns) {
				nodePosition.center_x = nodePosition.starting_x;
				nodePosition.center_y = nodePosition.center_y + nodePosition.height + nodePosition.y_offset;
			}

			// Set node size
			theNet.GetNode(nodeHandle)->Info().Screen().position.height = nodePosition.height;
			theNet.GetNode(nodeHandle)->Info().Screen().position.width = nodePosition.width;

			// Create states (new nodes default to two; second is a nullState)
			DSL_stringArray stateNames;
			stateNames.Add(stateID);
			stateNames.Add("nullState");
			int numberOfOutcomes = theNet.GetNode(nodeHandle)->Definition()->SetNumberOfOutcomes(stateNames);

			if (probability < 1.0) {
				// Set probabilities (second is a placeholder)
				DSL_doubleArray probabilities;
				probabilities.SetSize(2);
				probabilities[0] = probability;
				probabilities[1] = 1 - probability;
				theNet.GetNode(nodeHandle)->Definition()->SetDefinition(probabilities);
			}
			else {
				cout << "Probability > 1.0 for node: " << nodeName << endl;
			}

		} else 
		{
			// Set up node data structures
			DSL_nodeDefinition *def = theNet.GetNode(nodeHandle)->Definition();
			const DSL_Dmatrix &cpt = *def->GetMatrix();
			const DSL_idArray &outcomes = *def->GetOutcomesNames();
			const int numberOfOutcomes = def->GetNumberOfOutcomes();

			// Search for stateName
			int outcomeIdx = 0;
			while (outcomeIdx < numberOfOutcomes && outcomes[outcomeIdx] != validStateName) {
				outcomeIdx++;
			}

			if (outcomeIdx == numberOfOutcomes) {		// If stateName is not in list

				DSL_doubleArray probabilities;

				string secondOutcome = outcomes[1];

				if (secondOutcome == "nullState") {		// If this is the second state

					probabilities.SetSize(outcomeIdx);	// Set the size of the probability table

					for (int elemIdx = 0; elemIdx < outcomeIdx; elemIdx++) {		// Get the other probabilities
						probabilities[elemIdx] = cpt[elemIdx];
					}

					probabilities[outcomeIdx - 1] = probability;					// Add the new probability

					// Set up state array with new and old stateNames
					DSL_stringArray stateArray;
					stateArray.Add(outcomes[0]);
					stateArray.Add(stateID);

					// Set new states
					theNet.GetNode(nodeHandle)->Definition()->SetNumberOfOutcomes(stateArray);

					outcomeIdx--;
				}
				else {

					probabilities.SetSize(outcomeIdx + 1);	// Set the size of the probability table

					for (int elemIdx = 0; elemIdx < outcomeIdx; elemIdx++) {		// Get the other probabilities
						probabilities[elemIdx] = cpt[elemIdx];
					}

					probabilities[outcomeIdx] = probability;						// Add the new probability
				
					// Add the new state
					theNet.GetNode(nodeHandle)->Definition()->AddOutcome(stateID);
				}

				theNet.GetNode(nodeHandle)->Definition()->SetDefinition(probabilities);		// Set new probabilities

			} else {
				cout << "Duplicate state for node: " << theNet.GetNode(nodeHandle)->GetId() << " State: " << ids1[ds.GetInt(1, rec)].c_str() << endl;
			}
		}
	}

	// Normalize all CPTs so total probabilities is 1.0
	int nodeHandle = theNet.GetFirstNode();

	// Go through all the nodes
	while (nodeHandle >= 0) {

		// Set up node data structures
		DSL_node *node = theNet.GetNode(nodeHandle);
		DSL_nodeDefinition *def = theNet.GetNode(nodeHandle)->Definition();
		const DSL_Dmatrix &cpt = *def->GetMatrix();
		const DSL_idArray &outcomes = *def->GetOutcomesNames();
		const int numberOfOutcomes = def->GetNumberOfOutcomes();

		// Set up probabilities array
		DSL_doubleArray probabilities;
		probabilities.SetSize(numberOfOutcomes);
		double totalProbability = 0;

		// Get all the probabilities and total them up
		for (int elemIdx = 0; elemIdx < numberOfOutcomes; elemIdx++) {
			probabilities[elemIdx] = cpt[elemIdx];							// Move probability into an array we can manipulate
			totalProbability = totalProbability + probabilities[elemIdx];	// Keep track of total probability
		}

		// If the total is not 1.0
		if (totalProbability != 1.0) {
			for (int elemIdx = 0; elemIdx < numberOfOutcomes; elemIdx++) {
				probabilities[elemIdx] = probabilities[elemIdx] * 1/totalProbability;	// Scale each probability by 1/totalProbability
			}

			// Set the normalized probabilities
			theNet.GetNode(nodeHandle)->Definition()->SetDefinition(probabilities);

			cout << "Normalized probabilities for node: " << node->GetId() << endl;
		}
		// Get next node
		nodeHandle = theNet.GetNextNode(nodeHandle);
	}

	// Write network to file in XDSL format
	theNet.WriteFile("BBN_Network.xdsl", DSL_XDSL_FORMAT);

	return 0;
}
Post Reply