From bdb0221253c34fa05ce4ac0e61a2451c3b4a0c43 Mon Sep 17 00:00:00 2001 From: Mateusz Date: Mon, 1 Apr 2024 18:03:42 +0200 Subject: [PATCH] Dockerfile --- Jenkinsfile | 42 ++++++++++++------------------------------ create-dataset.py | 20 ++------------------ 2 files changed, 14 insertions(+), 48 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 57f76e2..517a38d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,5 +1,10 @@ pipeline { - agent any + agent { + dockerfile { + filename 'Dockerfile' + args '-u root' + } + } parameters { string ( @@ -15,39 +20,16 @@ pipeline { ) } + environment { + KAGGLE_USERNAME = "${params.KAGGLE_USERNAME}" + KAGGLE_KEY = "${params.KAGGLE_KEY}" + } + stages { - stage('Clone Repository') { - steps { - git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git' - } - } - - stage('Download dataset') { - steps { - withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { - sh 'kaggle datasets download -d mlg-ulb/creditcardfraud' - sh 'unzip -o creditcardfraud.zip' - sh 'rm creditcardfraud.zip' - } - } - } - stage('Run create-dataset script') { - agent { - dockerfile { - reuseNode true - } - } - steps { sh 'chmod +x create-dataset.py' - sh 'python3 ./create-dataset.py' - } - } - - stage('Archive Artifacts') { - steps { - archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true + sh 'python3 ./create-dataset.py $KAGGLE_USERNAME' } } } diff --git a/create-dataset.py b/create-dataset.py index a789f9a..2c5d216 100644 --- a/create-dataset.py +++ b/create-dataset.py @@ -91,26 +91,10 @@ def main(): # download_kaggle_dataset() os.makedirs("data", exist_ok=True) - os.system("ls -al") os.system("pwd") + os.system("rm -rf /var/lib/jenkins/workspace/z-s464913-create-dataset-1/data/") - df = load_data("creditcard.csv") - df = normalize_data(df) - - undersample_data, X_undersample, y_undersample = create_undersample_data(df) - X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = ( - split_undersample_data(X_undersample, y_undersample) - ) - save_undersample_data( - undersample_data, - X_train_undersample, - X_test_undersample, - y_train_undersample, - y_test_undersample, - ) - - X_train, X_test, y_train, y_test = split_whole_data(df) - save_whole_data(df, X_train, X_test, y_train, y_test) + os.system("ls -l /var/lib/jenkins/workspace/z-s464913-create-dataset-1/") if __name__ == "__main__":