From 6e8d683268e9c46213be7e858c8faf30c82ef7e3 Mon Sep 17 00:00:00 2001 From: Mateusz Date: Mon, 1 Apr 2024 16:41:47 +0200 Subject: [PATCH] Dockerfile --- Jenkinsfile | 14 ++++++++++---- create-dataset.py | 23 ++++++++++++----------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 25c7dfa..4a32f3a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,5 +1,10 @@ pipeline { - agent any + agent { + dockerfile { + filename 'Dockerfile' + args '-u root' + } + } parameters { string ( @@ -19,8 +24,9 @@ pipeline { stage('Run create-dataset script') { steps { withEnv (["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { - sh 'sudo rm -rf .kaggle' - sh 'ls -al' + sh 'mkdir /root/.kaggle' + sh 'echo "{\"username\":\"$KAGGLE_USERNAME\",\"key\":\"$KAGGLE_KEY\"}" > /root/.kaggle/kaggle.json' + sh 'chmod 600 /root/.kaggle/kaggle.json' sh 'chmod +x create-dataset.py' sh 'python3 ./create-dataset.py' } @@ -28,7 +34,7 @@ pipeline { } stage('Archive Artifacts') { steps { - archiveArtifacts artifacts: '/app/data/*', onlyIfSuccessful: true + archiveArtifacts artifacts: '/data/*', onlyIfSuccessful: true } } } diff --git a/create-dataset.py b/create-dataset.py index e307f24..7ff160f 100644 --- a/create-dataset.py +++ b/create-dataset.py @@ -9,6 +9,7 @@ from sklearn.model_selection import train_test_split def download_kaggle_dataset(): + os.system("/root/.kaggle/kaggle.json") kaggle = KaggleApi() kaggle.authenticate() kaggle.dataset_download_files("mlg-ulb/creditcardfraud", path="./", unzip=True) @@ -69,11 +70,11 @@ def save_undersample_data( y_train_undersample, y_test_undersample, ): - undersample_data.to_csv("data/undersample_data.csv", index=False) - X_train_undersample.to_csv("data/X_train_undersample.csv", index=False) - X_test_undersample.to_csv("data/X_test_undersample.csv", index=False) - y_train_undersample.to_csv("data/y_train_undersample.csv", index=False) - y_test_undersample.to_csv("data/y_test_undersample.csv", index=False) + undersample_data.to_csv("/data/undersample_data.csv", index=False) + X_train_undersample.to_csv("/data/X_train_undersample.csv", index=False) + X_test_undersample.to_csv("/data/X_test_undersample.csv", index=False) + y_train_undersample.to_csv("/data/y_train_undersample.csv", index=False) + y_test_undersample.to_csv("/data/y_test_undersample.csv", index=False) def split_whole_data(df): @@ -87,16 +88,16 @@ def split_whole_data(df): def save_whole_data(df, X_train, X_test, y_train, y_test): - df.to_csv("data/creditcard.csv", index=False) - X_train.to_csv("data/X_train.csv", index=False) - X_test.to_csv("data/X_test.csv", index=False) - y_train.to_csv("data/y_train.csv", index=False) - y_test.to_csv("data/y_test.csv", index=False) + df.to_csv("/data/creditcard.csv", index=False) + X_train.to_csv("/data/X_train.csv", index=False) + X_test.to_csv("/data/X_test.csv", index=False) + y_train.to_csv("/data/y_train.csv", index=False) + y_test.to_csv("/data/y_test.csv", index=False) def main(): download_kaggle_dataset() - os.makedirs("data", exist_ok=True) + os.makedirs("/data", exist_ok=True) df = load_data("creditcard.csv") df = normalize_data(df)