From 3a9ca8cbc1af9131d98aab540b999f545498e2ad Mon Sep 17 00:00:00 2001 From: Mateusz Date: Mon, 1 Apr 2024 19:39:09 +0200 Subject: [PATCH] Dockerfile --- create-dataset.py | 1 + dataset-stats.py | 22 ++++++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/create-dataset.py b/create-dataset.py index eb10e44..4903d69 100644 --- a/create-dataset.py +++ b/create-dataset.py @@ -89,6 +89,7 @@ def save_whole_data(df, X_train, X_test, y_train, y_test): def main(): os.makedirs("data", exist_ok=True) + os.system("rm -rf data/*") df = load_data("creditcard.csv") df = normalize_data(df) diff --git a/dataset-stats.py b/dataset-stats.py index 5a3510f..7cb9d2a 100644 --- a/dataset-stats.py +++ b/dataset-stats.py @@ -1,4 +1,5 @@ import os +import sys import pandas as pd @@ -18,13 +19,15 @@ def write_to_file(file_name): y_train = pd.read_csv("data/y_train.csv") with open("stats_data/" + file_name, "w") as f: + sys.stdout = f + f.write("Check missing values\n") f.write(str(df.isnull().sum())) f.write("\n\n") f.write("Size of the dataset\n") - print(df.info(), file=f) + df.info() f.write("\n\n") @@ -39,7 +42,7 @@ def write_to_file(file_name): f.write("\n\n") f.write("Size of undersampled dataset\n") - print(undersample_data.info(), file=f) + undersample_data.info() f.write("\n\n") @@ -56,7 +59,7 @@ def write_to_file(file_name): f.write("\n\n") f.write("Statistical measures of the training dataset of whole data\n") - print(pd.concat([X_train, y_train], axis=1).info(), file=f) + pd.concat([X_train, y_train], axis=1).info() f.write("\n") f.write(str(pd.concat([X_train, y_train], axis=1).describe())) f.write("\n") @@ -65,7 +68,7 @@ def write_to_file(file_name): f.write("\n\n") f.write("Statistical measures of the test dataset of whole data\n") - print(pd.concat([X_test, y_test], axis=1).info(), file=f) + pd.concat([X_test, y_test], axis=1).info() f.write("\n") f.write(str(pd.concat([X_test, y_test], axis=1).describe())) f.write("\n") @@ -74,9 +77,7 @@ def write_to_file(file_name): f.write("\n\n") f.write("Statistical measures of the training dataset of undersampled data\n") - print( - pd.concat([X_train_undersample, y_train_undersample], axis=1).info(), file=f - ) + pd.concat([X_train_undersample, y_train_undersample], axis=1).info() f.write("\n") f.write( str( @@ -95,9 +96,7 @@ def write_to_file(file_name): f.write("\n\n") f.write("Statistical measures of the test dataset of undersampled data\n") - print( - pd.concat([X_test_undersample, y_test_undersample], axis=1).info(), file=f - ) + pd.concat([X_test_undersample, y_test_undersample], axis=1).info() f.write("\n") f.write( str(pd.concat([X_test_undersample, y_test_undersample], axis=1).describe()) @@ -111,9 +110,12 @@ def write_to_file(file_name): ) ) + sys.stdout = sys.__stdout__ + def main(): os.makedirs("stats_data", exist_ok=True) + os.system("rm -rf stats_data/*") write_to_file("stats.txt")