diff --git a/dataset-stats.py b/dataset-stats.py index e69de29..beba31f 100644 --- a/dataset-stats.py +++ b/dataset-stats.py @@ -0,0 +1,18 @@ +import pandas + +X_train = pandas.read_csv('X_train.csv', + engine='python', + encoding='ISO-8859-1', + sep=',') +X_dev = pandas.read_csv('X_dev.csv', + engine='python', + encoding='ISO-8859-1', + sep=',') +X_test = pandas.read_csv('X_test.csv', + engine='python', + encoding='ISO-8859-1', + sep=',') + +X_train.describe(include='all').to_csv('X_train_stats.csv', index=True) +X_dev.describe(include='all').to_csv('X_dev_stats.csv', index=True) +X_test.describe(include='all').to_csv('X_test_stats.csv', index=True) \ No newline at end of file