From 68c257e7fbbb7cfb08f1d0aee1220648e595e862 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20=C5=81=C4=85czkowski?= <broketmpl@gmail.com>
Date: Wed, 3 Apr 2024 23:29:46 +0200
Subject: [PATCH] IUM_04 - back to old Jenkinsfile, old download_dataset script

---
 Jenkinsfile         | 42 ++++++++++++++++++++++--------------------
 download_dataset.py |  6 +++---
 2 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 161baba..8e4f342 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,15 +1,5 @@
 pipeline {
-    agent {
-        dockerfile {
-            filename 'Dockerfile'
-            reuseNode true
-        }
-    }
-
-    environment {
-        KAGGLE_USERNAME = credentials('KAGGLE_USERNAME')
-        KAGGLE_KEY = credentials('KAGGLE_KEY')
-    }
+    agent any
 
     parameters {
         password (
@@ -36,18 +26,30 @@ pipeline {
             }
         }
 
-        stage('Download dataset and preprocess data') {
+        stage('Download dataset') {
             steps {
-                script {
-                    sh "echo ${env.KAGGLE_USERNAME}"
-                    sh "echo ${KAGGLE_USERNAME}"
-                    sh "echo ${CUTOFF}"
-                    sh "echo ${params.CUTOFF}"
-                    sh "chmod +x ./download_dataset.py"
-                    sh "python3 ./download_dataset.py ${params.CUTOFF}"
-                    archiveArtifacts artifacts: 'datasets/*', onlyIfSuccessful: true
+                withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
+                    sh "kaggle datasets download -d uciml/breast-cancer-wisconsin-data"
+                    sh "unzip -o breast-cancer-wisconsin-data.zip"
+                    sh "mkdir -p datasets"
+                    sh "mv data.csv datasets/data.csv"
                 }
             }
         }
+
+        stage('Preprocess data') {
+            agent {
+                dockerfile {
+                    filename 'Dockerfile'
+                    reuseNode true
+                }
+            }
+
+            steps {
+                sh "chmod +x ./download_dataset.py"
+                sh "python3 ./download_dataset.py ${params.CUTOFF}"
+                archiveArtifacts artifacts: 'datasets/*', onlyIfSuccessful: true
+            }
+        }
     }
 }
\ No newline at end of file
diff --git a/download_dataset.py b/download_dataset.py
index 74ba05c..a41145c 100644
--- a/download_dataset.py
+++ b/download_dataset.py
@@ -1,13 +1,13 @@
 # Necessary imports
 import pandas as pd
-import kaggle
+# import kaggle
 import sys
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import MinMaxScaler
 
 # Download the dataset
-kaggle.api.authenticate()
-kaggle.api.dataset_download_files('uciml/breast-cancer-wisconsin-data', path='./datasets', unzip=True)
+# kaggle.api.authenticate()
+# kaggle.api.dataset_download_files('uciml/breast-cancer-wisconsin-data', path='./datasets', unzip=True)
 
 # Load the dataset
 df = pd.read_csv('./datasets/data.csv', index_col='id')