IUM_04 - back to old Jenkinsfile, old download_dataset script
This commit is contained in:
parent
29ed2c18ca
commit
68c257e7fb
38
Jenkinsfile
vendored
38
Jenkinsfile
vendored
@ -1,15 +1,5 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent {
|
agent any
|
||||||
dockerfile {
|
|
||||||
filename 'Dockerfile'
|
|
||||||
reuseNode true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
environment {
|
|
||||||
KAGGLE_USERNAME = credentials('KAGGLE_USERNAME')
|
|
||||||
KAGGLE_KEY = credentials('KAGGLE_KEY')
|
|
||||||
}
|
|
||||||
|
|
||||||
parameters {
|
parameters {
|
||||||
password (
|
password (
|
||||||
@ -36,18 +26,30 @@ pipeline {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
stage('Download dataset and preprocess data') {
|
stage('Download dataset') {
|
||||||
|
steps {
|
||||||
|
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
||||||
|
sh "kaggle datasets download -d uciml/breast-cancer-wisconsin-data"
|
||||||
|
sh "unzip -o breast-cancer-wisconsin-data.zip"
|
||||||
|
sh "mkdir -p datasets"
|
||||||
|
sh "mv data.csv datasets/data.csv"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stage('Preprocess data') {
|
||||||
|
agent {
|
||||||
|
dockerfile {
|
||||||
|
filename 'Dockerfile'
|
||||||
|
reuseNode true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
steps {
|
steps {
|
||||||
script {
|
|
||||||
sh "echo ${env.KAGGLE_USERNAME}"
|
|
||||||
sh "echo ${KAGGLE_USERNAME}"
|
|
||||||
sh "echo ${CUTOFF}"
|
|
||||||
sh "echo ${params.CUTOFF}"
|
|
||||||
sh "chmod +x ./download_dataset.py"
|
sh "chmod +x ./download_dataset.py"
|
||||||
sh "python3 ./download_dataset.py ${params.CUTOFF}"
|
sh "python3 ./download_dataset.py ${params.CUTOFF}"
|
||||||
archiveArtifacts artifacts: 'datasets/*', onlyIfSuccessful: true
|
archiveArtifacts artifacts: 'datasets/*', onlyIfSuccessful: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
@ -1,13 +1,13 @@
|
|||||||
# Necessary imports
|
# Necessary imports
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import kaggle
|
# import kaggle
|
||||||
import sys
|
import sys
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from sklearn.preprocessing import MinMaxScaler
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
|
||||||
# Download the dataset
|
# Download the dataset
|
||||||
kaggle.api.authenticate()
|
# kaggle.api.authenticate()
|
||||||
kaggle.api.dataset_download_files('uciml/breast-cancer-wisconsin-data', path='./datasets', unzip=True)
|
# kaggle.api.dataset_download_files('uciml/breast-cancer-wisconsin-data', path='./datasets', unzip=True)
|
||||||
|
|
||||||
# Load the dataset
|
# Load the dataset
|
||||||
df = pd.read_csv('./datasets/data.csv', index_col='id')
|
df = pd.read_csv('./datasets/data.csv', index_col='id')
|
||||||
|
Loading…
Reference in New Issue
Block a user