From 68c257e7fbbb7cfb08f1d0aee1220648e595e862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20=C5=81=C4=85czkowski?= Date: Wed, 3 Apr 2024 23:29:46 +0200 Subject: [PATCH] IUM_04 - back to old Jenkinsfile, old download_dataset script --- Jenkinsfile | 42 ++++++++++++++++++++++-------------------- download_dataset.py | 6 +++--- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 161baba..8e4f342 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,15 +1,5 @@ pipeline { - agent { - dockerfile { - filename 'Dockerfile' - reuseNode true - } - } - - environment { - KAGGLE_USERNAME = credentials('KAGGLE_USERNAME') - KAGGLE_KEY = credentials('KAGGLE_KEY') - } + agent any parameters { password ( @@ -36,18 +26,30 @@ pipeline { } } - stage('Download dataset and preprocess data') { + stage('Download dataset') { steps { - script { - sh "echo ${env.KAGGLE_USERNAME}" - sh "echo ${KAGGLE_USERNAME}" - sh "echo ${CUTOFF}" - sh "echo ${params.CUTOFF}" - sh "chmod +x ./download_dataset.py" - sh "python3 ./download_dataset.py ${params.CUTOFF}" - archiveArtifacts artifacts: 'datasets/*', onlyIfSuccessful: true + withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { + sh "kaggle datasets download -d uciml/breast-cancer-wisconsin-data" + sh "unzip -o breast-cancer-wisconsin-data.zip" + sh "mkdir -p datasets" + sh "mv data.csv datasets/data.csv" } } } + + stage('Preprocess data') { + agent { + dockerfile { + filename 'Dockerfile' + reuseNode true + } + } + + steps { + sh "chmod +x ./download_dataset.py" + sh "python3 ./download_dataset.py ${params.CUTOFF}" + archiveArtifacts artifacts: 'datasets/*', onlyIfSuccessful: true + } + } } } \ No newline at end of file diff --git a/download_dataset.py b/download_dataset.py index 74ba05c..a41145c 100644 --- a/download_dataset.py +++ b/download_dataset.py @@ -1,13 +1,13 @@ # Necessary imports import pandas as pd -import kaggle +# import kaggle import sys from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler # Download the dataset -kaggle.api.authenticate() -kaggle.api.dataset_download_files('uciml/breast-cancer-wisconsin-data', path='./datasets', unzip=True) +# kaggle.api.authenticate() +# kaggle.api.dataset_download_files('uciml/breast-cancer-wisconsin-data', path='./datasets', unzip=True) # Load the dataset df = pd.read_csv('./datasets/data.csv', index_col='id')