Dockerfile

This commit is contained in:
Mateusz 2024-04-01 19:39:09 +02:00
parent b946aaf7cc
commit 3a9ca8cbc1
2 changed files with 13 additions and 10 deletions

View File

@ -89,6 +89,7 @@ def save_whole_data(df, X_train, X_test, y_train, y_test):
def main():
os.makedirs("data", exist_ok=True)
os.system("rm -rf data/*")
df = load_data("creditcard.csv")
df = normalize_data(df)

View File

@ -1,4 +1,5 @@
import os
import sys
import pandas as pd
@ -18,13 +19,15 @@ def write_to_file(file_name):
y_train = pd.read_csv("data/y_train.csv")
with open("stats_data/" + file_name, "w") as f:
sys.stdout = f
f.write("Check missing values\n")
f.write(str(df.isnull().sum()))
f.write("\n\n")
f.write("Size of the dataset\n")
print(df.info(), file=f)
df.info()
f.write("\n\n")
@ -39,7 +42,7 @@ def write_to_file(file_name):
f.write("\n\n")
f.write("Size of undersampled dataset\n")
print(undersample_data.info(), file=f)
undersample_data.info()
f.write("\n\n")
@ -56,7 +59,7 @@ def write_to_file(file_name):
f.write("\n\n")
f.write("Statistical measures of the training dataset of whole data\n")
print(pd.concat([X_train, y_train], axis=1).info(), file=f)
pd.concat([X_train, y_train], axis=1).info()
f.write("\n")
f.write(str(pd.concat([X_train, y_train], axis=1).describe()))
f.write("\n")
@ -65,7 +68,7 @@ def write_to_file(file_name):
f.write("\n\n")
f.write("Statistical measures of the test dataset of whole data\n")
print(pd.concat([X_test, y_test], axis=1).info(), file=f)
pd.concat([X_test, y_test], axis=1).info()
f.write("\n")
f.write(str(pd.concat([X_test, y_test], axis=1).describe()))
f.write("\n")
@ -74,9 +77,7 @@ def write_to_file(file_name):
f.write("\n\n")
f.write("Statistical measures of the training dataset of undersampled data\n")
print(
pd.concat([X_train_undersample, y_train_undersample], axis=1).info(), file=f
)
pd.concat([X_train_undersample, y_train_undersample], axis=1).info()
f.write("\n")
f.write(
str(
@ -95,9 +96,7 @@ def write_to_file(file_name):
f.write("\n\n")
f.write("Statistical measures of the test dataset of undersampled data\n")
print(
pd.concat([X_test_undersample, y_test_undersample], axis=1).info(), file=f
)
pd.concat([X_test_undersample, y_test_undersample], axis=1).info()
f.write("\n")
f.write(
str(pd.concat([X_test_undersample, y_test_undersample], axis=1).describe())
@ -111,9 +110,12 @@ def write_to_file(file_name):
)
)
sys.stdout = sys.__stdout__
def main():
os.makedirs("stats_data", exist_ok=True)
os.system("rm -rf stats_data/*")
write_to_file("stats.txt")