From d2e2cb1b84702abbba28a0ec9137af8400654487 Mon Sep 17 00:00:00 2001 From: Mateusz Date: Mon, 1 Apr 2024 15:41:19 +0200 Subject: [PATCH] Dockerfile --- Jenkinsfile | 1 + create-dataset.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a83cf3f..e5f0ea8 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -24,6 +24,7 @@ pipeline { stage('Run create-dataset script') { steps { withEnv (["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { + sh 'rm -rf /.kaggle' sh 'chmod +x create-dataset.py' sh 'python3 ./create-dataset.py' } diff --git a/create-dataset.py b/create-dataset.py index 85fdcef..295e211 100644 --- a/create-dataset.py +++ b/create-dataset.py @@ -70,11 +70,11 @@ def save_undersample_data( y_train_undersample, y_test_undersample, ): - undersample_data.to_csv("data/undersample_data.csv", index=False) - X_train_undersample.to_csv("data/X_train_undersample.csv", index=False) - X_test_undersample.to_csv("data/X_test_undersample.csv", index=False) - y_train_undersample.to_csv("data/y_train_undersample.csv", index=False) - y_test_undersample.to_csv("data/y_test_undersample.csv", index=False) + undersample_data.to_csv("/data/undersample_data.csv", index=False) + X_train_undersample.to_csv("/data/X_train_undersample.csv", index=False) + X_test_undersample.to_csv("/data/X_test_undersample.csv", index=False) + y_train_undersample.to_csv("/data/y_train_undersample.csv", index=False) + y_test_undersample.to_csv("/data/y_test_undersample.csv", index=False) def split_whole_data(df): @@ -88,16 +88,16 @@ def split_whole_data(df): def save_whole_data(df, X_train, X_test, y_train, y_test): - df.to_csv("data/creditcard.csv", index=False) - X_train.to_csv("data/X_train.csv", index=False) - X_test.to_csv("data/X_test.csv", index=False) - y_train.to_csv("data/y_train.csv", index=False) - y_test.to_csv("data/y_test.csv", index=False) + df.to_csv("/data/creditcard.csv", index=False) + X_train.to_csv("/data/X_train.csv", index=False) + X_test.to_csv("/data/X_test.csv", index=False) + y_train.to_csv("/data/y_train.csv", index=False) + y_test.to_csv("/data/y_test.csv", index=False) def main(): download_kaggle_dataset() - os.makedirs("data", exist_ok=True) + os.makedirs("/data", exist_ok=True) df = load_data("creditcard.csv") df = normalize_data(df)