2024-03-20 14:23:42 +01:00
|
|
|
pipeline {
|
2024-03-29 16:59:06 +01:00
|
|
|
agent any
|
2024-03-20 17:26:55 +01:00
|
|
|
|
|
|
|
parameters {
|
|
|
|
password (
|
|
|
|
name: 'KAGGLE_USERNAME',
|
|
|
|
defaultValue: '',
|
|
|
|
description: 'Kaggle username'
|
2024-03-20 17:30:31 +01:00
|
|
|
)
|
2024-03-20 17:26:55 +01:00
|
|
|
password (
|
|
|
|
name: 'KAGGLE_KEY',
|
|
|
|
defaultValue: '',
|
|
|
|
description: 'Kaggle API key'
|
2024-03-20 17:30:31 +01:00
|
|
|
)
|
2024-03-20 17:33:00 +01:00
|
|
|
string (
|
2024-03-20 17:26:55 +01:00
|
|
|
name: 'CUTOFF',
|
2024-03-20 18:31:15 +01:00
|
|
|
defaultValue: '500',
|
2024-03-20 17:26:55 +01:00
|
|
|
description: 'Get only the first CUTOFF rows of the dataset'
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2024-03-20 14:23:42 +01:00
|
|
|
stages {
|
2024-03-20 17:26:55 +01:00
|
|
|
stage('Clone repository') {
|
|
|
|
steps {
|
|
|
|
checkout scm
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
stage('Download dataset') {
|
2024-03-20 14:23:42 +01:00
|
|
|
steps {
|
2024-03-29 16:59:06 +01:00
|
|
|
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
|
|
|
sh "kaggle datasets download -d uciml/breast-cancer-wisconsin-data"
|
2024-03-29 16:45:52 +01:00
|
|
|
sh "unzip -o breast-cancer-wisconsin-data.zip"
|
|
|
|
sh "mkdir datasets"
|
|
|
|
sh "mv data.csv datasets/data.csv"
|
2024-03-20 17:26:55 +01:00
|
|
|
}
|
2024-03-20 14:23:42 +01:00
|
|
|
}
|
|
|
|
}
|
2024-03-29 16:59:06 +01:00
|
|
|
|
|
|
|
stage('Preprocess data') {
|
|
|
|
agent {
|
|
|
|
dockerfile {
|
|
|
|
filename 'Dockerfile'
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
steps {
|
|
|
|
sh "sudo chmod +x ./download_dataset.py"
|
|
|
|
sh "python3 ./download_dataset.py ${params.CUTOFF}"
|
|
|
|
archiveArtifacts artifacts: './datasets/data.csv,./datasets/train.csv,./datasets/dev.csv,./datasets/test.csv', onlyIfSuccessful: true
|
|
|
|
}
|
|
|
|
}
|
2024-03-20 14:23:42 +01:00
|
|
|
}
|
|
|
|
}
|