From 5f863f13b1b54e8fc131b23e958b810f27eaa0fc Mon Sep 17 00:00:00 2001 From: Mateusz Date: Mon, 1 Apr 2024 18:04:49 +0200 Subject: [PATCH] Dockerfile --- Jenkinsfile | 42 ++++++++++++++++++++++++++++++------------ create-dataset.py | 21 ++++++++++++++++++--- 2 files changed, 48 insertions(+), 15 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 517a38d..57f76e2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,10 +1,5 @@ pipeline { - agent { - dockerfile { - filename 'Dockerfile' - args '-u root' - } - } + agent any parameters { string ( @@ -20,16 +15,39 @@ pipeline { ) } - environment { - KAGGLE_USERNAME = "${params.KAGGLE_USERNAME}" - KAGGLE_KEY = "${params.KAGGLE_KEY}" - } - stages { + stage('Clone Repository') { + steps { + git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git' + } + } + + stage('Download dataset') { + steps { + withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { + sh 'kaggle datasets download -d mlg-ulb/creditcardfraud' + sh 'unzip -o creditcardfraud.zip' + sh 'rm creditcardfraud.zip' + } + } + } + stage('Run create-dataset script') { + agent { + dockerfile { + reuseNode true + } + } + steps { sh 'chmod +x create-dataset.py' - sh 'python3 ./create-dataset.py $KAGGLE_USERNAME' + sh 'python3 ./create-dataset.py' + } + } + + stage('Archive Artifacts') { + steps { + archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true } } } diff --git a/create-dataset.py b/create-dataset.py index 2c5d216..5ad482c 100644 --- a/create-dataset.py +++ b/create-dataset.py @@ -91,10 +91,25 @@ def main(): # download_kaggle_dataset() os.makedirs("data", exist_ok=True) - os.system("pwd") - os.system("rm -rf /var/lib/jenkins/workspace/z-s464913-create-dataset-1/data/") + os.system - os.system("ls -l /var/lib/jenkins/workspace/z-s464913-create-dataset-1/") + df = load_data("creditcard.csv") + df = normalize_data(df) + + undersample_data, X_undersample, y_undersample = create_undersample_data(df) + X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = ( + split_undersample_data(X_undersample, y_undersample) + ) + save_undersample_data( + undersample_data, + X_train_undersample, + X_test_undersample, + y_train_undersample, + y_test_undersample, + ) + + X_train, X_test, y_train, y_test = split_whole_data(df) + save_whole_data(df, X_train, X_test, y_train, y_test) if __name__ == "__main__":