IUM_04 - back to old Jenkinsfile, old download_dataset script

2024-04-03 23:29:46 +02:00 · 2024-04-03 23:29:46 +02:00 · 68c257e7fb
commit 68c257e7fb
parent 29ed2c18ca
2 changed files with 25 additions and 23 deletions
--- a/42
+++ b/42
@ -1,15 +1,5 @@
 pipeline {
-    agent {
+    agent any
        dockerfile {
            filename 'Dockerfile'
            reuseNode true
        }
    }
    environment {
        KAGGLE_USERNAME = credentials('KAGGLE_USERNAME')
        KAGGLE_KEY = credentials('KAGGLE_KEY')
    }
    parameters {
        password (
@ -36,18 +26,30 @@ pipeline {
            }
        }
-        stage('Download dataset and preprocess data') {
+        stage('Download dataset') {
            steps {
-                script {
+                withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
-                    sh "echo ${env.KAGGLE_USERNAME}"
+                    sh "kaggle datasets download -d uciml/breast-cancer-wisconsin-data"
-                    sh "echo ${KAGGLE_USERNAME}"
+                    sh "unzip -o breast-cancer-wisconsin-data.zip"
-                    sh "echo ${CUTOFF}"
+                    sh "mkdir -p datasets"
-                    sh "echo ${params.CUTOFF}"
+                    sh "mv data.csv datasets/data.csv"
                    sh "chmod +x ./download_dataset.py"
                    sh "python3 ./download_dataset.py ${params.CUTOFF}"
                    archiveArtifacts artifacts: 'datasets/*', onlyIfSuccessful: true
                }
            }
        }
        stage('Preprocess data') {
            agent {
                dockerfile {
                    filename 'Dockerfile'
                    reuseNode true
                }
            }
            steps {
                sh "chmod +x ./download_dataset.py"
                sh "python3 ./download_dataset.py ${params.CUTOFF}"
                archiveArtifacts artifacts: 'datasets/*', onlyIfSuccessful: true
            }
        }
    }
 }
--- a/download_dataset.py
+++ b/download_dataset.py
@ -1,13 +1,13 @@
 # Necessary imports
 import pandas as pd
-import kaggle
+# import kaggle
 import sys
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import MinMaxScaler
 # Download the dataset
-kaggle.api.authenticate()
+# kaggle.api.authenticate()
-kaggle.api.dataset_download_files('uciml/breast-cancer-wisconsin-data', path='./datasets', unzip=True)
+# kaggle.api.dataset_download_files('uciml/breast-cancer-wisconsin-data', path='./datasets', unzip=True)
 # Load the dataset
 df = pd.read_csv('./datasets/data.csv', index_col='id')