In [1]:
import sys
sys.version
Out[1]:
'3.12.10 (tags/v3.12.10:0cc8128, Apr  8 2025, 12:21:36) [MSC v.1943 64 bit (AMD64)]'
In [2]:
import pandas as pd
import pysmile
import pysmile_license
print(pysmile.__version__)
2.4.6
In [3]:
df = pd.read_csv("data/ObesityDataSet_raw_and_data_sinthetic.csv")
df
Out[3]:
Gender Age Height Weight family_history_with_overweight FAVC FCVC NCP CAEC SMOKE CH2O SCC FAF TUE CALC MTRANS NObeyesdad
0 Female 21.000000 1.620000 64.000000 yes no 2.0 3.0 Sometimes no 2.000000 no 0.000000 1.000000 no Public_Transportation Normal_Weight
1 Female 21.000000 1.520000 56.000000 yes no 3.0 3.0 Sometimes yes 3.000000 yes 3.000000 0.000000 Sometimes Public_Transportation Normal_Weight
2 Male 23.000000 1.800000 77.000000 yes no 2.0 3.0 Sometimes no 2.000000 no 2.000000 1.000000 Frequently Public_Transportation Normal_Weight
3 Male 27.000000 1.800000 87.000000 no no 3.0 3.0 Sometimes no 2.000000 no 2.000000 0.000000 Frequently Walking Overweight_Level_I
4 Male 22.000000 1.780000 89.800000 no no 2.0 1.0 Sometimes no 2.000000 no 0.000000 0.000000 Sometimes Public_Transportation Overweight_Level_II
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2106 Female 20.976842 1.710730 131.408528 yes yes 3.0 3.0 Sometimes no 1.728139 no 1.676269 0.906247 Sometimes Public_Transportation Obesity_Type_III
2107 Female 21.982942 1.748584 133.742943 yes yes 3.0 3.0 Sometimes no 2.005130 no 1.341390 0.599270 Sometimes Public_Transportation Obesity_Type_III
2108 Female 22.524036 1.752206 133.689352 yes yes 3.0 3.0 Sometimes no 2.054193 no 1.414209 0.646288 Sometimes Public_Transportation Obesity_Type_III
2109 Female 24.361936 1.739450 133.346641 yes yes 3.0 3.0 Sometimes no 2.852339 no 1.139107 0.586035 Sometimes Public_Transportation Obesity_Type_III
2110 Female 23.664709 1.738836 133.472641 yes yes 3.0 3.0 Sometimes no 2.863513 no 1.026452 0.714137 Sometimes Public_Transportation Obesity_Type_III

2111 rows × 17 columns

In [4]:
smalldf = pd.DataFrame( df[["Gender","SMOKE","SCC"]])
smalldf
Out[4]:
Gender SMOKE SCC
0 Female no no
1 Female yes yes
2 Male no no
3 Male no no
4 Male no no
... ... ... ...
2106 Female no no
2107 Female no no
2108 Female no no
2109 Female no no
2110 Female no no

2111 rows × 3 columns

In [5]:
smalldf.reset_index(drop=True, inplace=True)
smalldf
Out[5]:
Gender SMOKE SCC
0 Female no no
1 Female yes yes
2 Male no no
3 Male no no
4 Male no no
... ... ... ...
2106 Female no no
2107 Female no no
2108 Female no no
2109 Female no no
2110 Female no no

2111 rows × 3 columns

In [6]:
smalldf.info()
<class 'pandas.DataFrame'>
RangeIndex: 2111 entries, 0 to 2110
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Gender  2111 non-null   str  
 1   SMOKE   2111 non-null   str  
 2   SCC     2111 non-null   str  
dtypes: str(3)
memory usage: 49.6 KB
In [7]:
ds = pysmile.learning.DataSet()
ds.read_pandas_dataframe( smalldf )
The Kernel crashed while executing code in the current cell or a previous cell. 

Please review the code in the cell(s) to identify a possible cause of the failure. 

Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. 

View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details.
In [ ]: