Simulate Continuous Network¶
In [1]:
import pandas as pd
import pysmile
import pysmile_license
import numpy as np
import os
Create simulated data from a network with continuous variables¶
In [2]:
output_folder = "./simulated_data"
os.makedirs(output_folder, exist_ok=True)
simulated_dataset_path = "./simulated_data/simulated_cont_dataset.csv"
In [3]:
# v is arrow
# C
# /
# v
# A B
# \ /
# v v
# D
# Number of samples
n_samples = 50000
# Set random seed for reproducibility
np.random.seed(42)
# Node D: Independent Gaussian
C = np.random.normal(loc=0, scale=1, size=n_samples)
# Node A: Depends on D
epsilon_A = np.random.normal(loc=0, scale=1, size=n_samples)
A = 20 * C + epsilon_A
# Node B: Independent
B = np.random.normal(loc=1, scale=1, size=n_samples)
# Node C: Deterministic sum of A and B
D = A + B
# Combine into DataFrame for inspection or further use
data = pd.DataFrame({'C': C, 'A': A, 'B': B, 'D': D})
print(data.head())
data.to_csv(simulated_dataset_path, index=False)
C A B D 0 0.496714 10.032489 2.030595 12.063084 1 -0.138264 -2.829394 -0.155355 -2.984749 2 0.647689 13.905562 1.575437 15.480999 3 1.523030 31.993428 0.380762 32.374190 4 -0.234153 -3.996221 0.672597 -3.323623
Structure learning from simulated dataset¶
In [4]:
cont_ds = pysmile.learning.DataSet()
try:
cont_ds.read_file(simulated_dataset_path)
except pysmile.SMILEException:
print("Dataset load failed")
#endtry
print(f"Dataset has {cont_ds.get_variable_count()} variables (columns) "
+ f"and {cont_ds.get_record_count()} records (rows)")
Dataset has 4 variables (columns) and 50000 records (rows)
In [5]:
bayes_search = pysmile.learning.BayesianSearch()
bayes_search.set_iteration_count(50)
bayes_search.set_rand_seed(9876543)
## (1)
try:
net1 = bayes_search.learn(cont_ds)
print(f"1st Bayesian Search finished, structure score: {bayes_search.get_last_score()}")
net1.write_file("./simulated_data/learned_cont_net_bayes_1.xdsl")
except pysmile.SMILEException:
print("Bayesian Search failed")
#endtry
## (2)
bayes_search = pysmile.learning.BayesianSearch()
bayes_search.set_iteration_count(50)
bayes_search.set_rand_seed(3456789)
try:
net2 = bayes_search.learn(cont_ds)
print(f"2nd Bayesian Search finished, structure score: {bayes_search.get_last_score()}")
net2.write_file("./simulated_data/learned_cont_net_bayes_2.xdsl")
except pysmile.SMILEException:
print("Bayesian Search failed")
#endtry
## (3)
pc = pysmile.learning.PC()
try:
pattern = pc.learn(cont_ds)
net5 = pattern.make_network(cont_ds)
print("PC finished, proceeding to parameter learning")
net5.write_file("./simulated_data/learned_cont_net_pc.xdsl")
except pysmile.SMILEException:
print("PC failed")
#endtry
Bayesian Search failed Bayesian Search failed PC finished, proceeding to parameter learning
Construct predefined network with continuous variables¶
In [6]:
def create_cont_node(net, id, name, x_pos, y_pos):
handle = net.add_node(pysmile.NodeType.EQUATION, id)
net.set_node_name(handle, name)
net.set_node_position(handle, x_pos, y_pos, 85, 55)
return handle
In [7]:
cont_net = pysmile.Network()
A = create_cont_node(cont_net, "A", "A", 10, 20)
B = create_cont_node(cont_net, "B", "B", 10, 30)
C = create_cont_node(cont_net, "C", "C", 10, 40)
D = create_cont_node(cont_net, "D", "D", 10, 50)
cont_net.add_arc(C, A)
cont_net.add_arc(A, D)
cont_net.add_arc(B, D)
cont_net.write_file("./simulated_data/predefined_cont_net.xdsl")
Learn parameter for predefined network¶
In [8]:
em = pysmile.learning.EM()
try:
matching = cont_ds.match_network(cont_net)
except pysmile.SMILEException:
print("Can't automatically match network with dataset")
#endtry
em.set_uniformize_parameters(False)
em.set_randomize_parameters(False)
em.set_eq_sample_size(0)
try:
em.learn(cont_ds, cont_net, matching)
except pysmile.SMILEException:
print("EM failed")
#endtry
print("EM finished")
cont_net.write_file("./simulated_data/simulated_data_em_cont.xdsl")
print("Complete.")
EM finished Complete.