IUM_04 - back to old Jenkinsfile, old download_dataset script

This commit is contained in:
Paweł Łączkowski 2024-04-03 23:29:46 +02:00
parent 29ed2c18ca
commit 68c257e7fb
2 changed files with 25 additions and 23 deletions

42
Jenkinsfile vendored
View File

@ -1,15 +1,5 @@
pipeline { pipeline {
agent { agent any
dockerfile {
filename 'Dockerfile'
reuseNode true
}
}
environment {
KAGGLE_USERNAME = credentials('KAGGLE_USERNAME')
KAGGLE_KEY = credentials('KAGGLE_KEY')
}
parameters { parameters {
password ( password (
@ -36,18 +26,30 @@ pipeline {
} }
} }
stage('Download dataset and preprocess data') { stage('Download dataset') {
steps { steps {
script { withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh "echo ${env.KAGGLE_USERNAME}" sh "kaggle datasets download -d uciml/breast-cancer-wisconsin-data"
sh "echo ${KAGGLE_USERNAME}" sh "unzip -o breast-cancer-wisconsin-data.zip"
sh "echo ${CUTOFF}" sh "mkdir -p datasets"
sh "echo ${params.CUTOFF}" sh "mv data.csv datasets/data.csv"
sh "chmod +x ./download_dataset.py"
sh "python3 ./download_dataset.py ${params.CUTOFF}"
archiveArtifacts artifacts: 'datasets/*', onlyIfSuccessful: true
} }
} }
} }
stage('Preprocess data') {
agent {
dockerfile {
filename 'Dockerfile'
reuseNode true
}
}
steps {
sh "chmod +x ./download_dataset.py"
sh "python3 ./download_dataset.py ${params.CUTOFF}"
archiveArtifacts artifacts: 'datasets/*', onlyIfSuccessful: true
}
}
} }
} }

View File

@ -1,13 +1,13 @@
# Necessary imports # Necessary imports
import pandas as pd import pandas as pd
import kaggle # import kaggle
import sys import sys
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import MinMaxScaler
# Download the dataset # Download the dataset
kaggle.api.authenticate() # kaggle.api.authenticate()
kaggle.api.dataset_download_files('uciml/breast-cancer-wisconsin-data', path='./datasets', unzip=True) # kaggle.api.dataset_download_files('uciml/breast-cancer-wisconsin-data', path='./datasets', unzip=True)
# Load the dataset # Load the dataset
df = pd.read_csv('./datasets/data.csv', index_col='id') df = pd.read_csv('./datasets/data.csv', index_col='id')