ium_434766/stats.py

15 lines
508 B
Python

import pandas as pd
def describeDataset(dt, dt2, dv):
data = pd.read_csv('healthcare-dataset-stroke-data.csv')
print("Whole dataset size: ", data.size)
print("Train dataset size: ", dt.size)
print("Test dataset size: ", dt2.size)
print("Validate dataset size: ", dv.size)
print(data.describe(include='all'))
data_train = pd.read_csv('data_train.csv')
data_test = pd.read_csv('data_test.csv')
data_val = pd.read_csv('data_val.csv')
describeDataset(data_train,data_test,data_val)