read_pandas_dataframe(self: pysmile.learning.DataSet, *args, **kwargs) -> None
to create DataSet object directly from a pandas dataframe.
Could you please tell me how can I fix this issue? Thanks.
The sample code is below:
Code: Select all
# %%
import pandas as pd
import pysmile
import pysmile_license
import numpy as np
import os
import scipy
import math
from functools import partial
# %%
def simulate_bayesian_network():
# Set random seed for reproducibility
np.random.seed(1)
N = 50000
# Step 1: B ~ Bernoulli(0.5)
B = np.random.binomial(1, 0.5, size=(N,))
# Step 2: A depends on B
A = np.zeros(N, dtype=int)
A[B == 1] = np.random.binomial(1, 0.9, size=(np.sum(B == 1),))
A[B == 0] = np.random.binomial(1, 0.1, size=(np.sum(B == 0),))
# Step 3: T depends on A and B
T = np.zeros(N, dtype=int)
T[(A == 1) & (B == 1)] = np.random.binomial(1, 0.9, size=np.sum((A == 1) & (B == 1)))
T[(A == 1) & (B == 0)] = np.random.binomial(1, 0.1, size=np.sum((A == 1) & (B == 0)))
T[(A == 0) & (B == 1)] = np.random.binomial(1, 0.1, size=np.sum((A == 0) & (B == 1)))
T[(A == 0) & (B == 0)] = np.random.binomial(1, 0.1, size=np.sum((A == 0) & (B == 0)))
data = {}
data['A'] = A
data['B'] = B
data['T'] = T
return pd.DataFrame(data)
# Simulate and preview the dataset
data = simulate_bayesian_network()
print(data.head())
print(type(data))
# %%
def assign_category_name_to_discrete_variables(df, variable_list):
# the variables that are categorical, but represented by numbers like 0, 1, 2
# convert it to form var_0, var_1, var_2
def process_categorical_name(x, prefix):
x = str(x)
return f"{prefix}___{x}"
for var in variable_list:
my_func_for_x = partial(process_categorical_name, prefix=var)
df[var] = df[var].apply(my_func_for_x)
#endfor
return df
# %%
assign_category_name_to_discrete_variables(data, ["A", "B", "T"])
# %%
print(type(data))
# %%
data.info()
# %%
# read_pandas_dataframe(self: pysmile.learning.DataSet, *args, **kwargs) -> None
# Reads data from a pandas DataFrame
data = data.astype("string")
for col in data.select_dtypes(include=['string', 'object']).columns:
data[col] = data[col].astype('category')
#endfor
data = data.reset_index(drop=True)
ds_1 = pysmile.learning.DataSet()
ds_1.read_pandas_dataframe(data)
Code: Select all
# Traceback (most recent call last):
# File "/create_SMILE_DataSet_from_pandas_dataframe.py", line 92, in <module>
# ds_1.read_pandas_dataframe(data)
# File "/users/anaconda3/envs/smile/lib/python3.11/site-packages/pandas/core/generic.py", line 6318, in __getattr__
# return object.__getattribute__(self, name)
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# AttributeError: 'DataFrame' object has no attribute 'dtype'. Did you mean: 'dtypes'?