IUM_04 - update Jenkinsfile and download dataset script
This commit is contained in:
parent
157fe812e8
commit
7b0a5e55b9
11
Jenkinsfile
vendored
11
Jenkinsfile
vendored
@ -34,14 +34,11 @@ pipeline {
|
|||||||
stage('Download dataset') {
|
stage('Download dataset') {
|
||||||
steps {
|
steps {
|
||||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}", "CUTOFF=${params.CUTOFF}"]) {
|
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}", "CUTOFF=${params.CUTOFF}"]) {
|
||||||
sh "apt-get install sudo -y"
|
sh "kaggle datasets download -d uciml/breast-cancer-wisconsin-data"
|
||||||
sh "sudo mkdir /.kaggle"
|
sh "unzip -o breast-cancer-wisconsin-data.zip"
|
||||||
sh "sudo echo > /.kaggle/kaggle.json"
|
sh "mkdir datasets"
|
||||||
sh "sudo chmod 777 /.kaggle/kaggle.json"
|
sh "mv data.csv datasets/data.csv"
|
||||||
sh "sudo chown `whoami` /.kaggle/kaggle.json"
|
|
||||||
sh "sudo chmod +x ./download_dataset.py"
|
sh "sudo chmod +x ./download_dataset.py"
|
||||||
sh "export KAGGLE_USERNAME=${params.KAGGLE_USERNAME}"
|
|
||||||
sh "export KAGGLE_KEY=${params.KAGGLE_KEY}"
|
|
||||||
sh "python3 ./download_dataset.py $CUTOFF"
|
sh "python3 ./download_dataset.py $CUTOFF"
|
||||||
archiveArtifacts artifacts: './datasets/data.csv,./datasets/train.csv,./datasets/dev.csv,./datasets/test.csv', onlyIfSuccessful: true
|
archiveArtifacts artifacts: './datasets/data.csv,./datasets/train.csv,./datasets/dev.csv,./datasets/test.csv', onlyIfSuccessful: true
|
||||||
}
|
}
|
||||||
|
@ -5,10 +5,6 @@ import kaggle
|
|||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from sklearn.preprocessing import MinMaxScaler
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
|
||||||
# Download the dataset from Kaggle
|
|
||||||
kaggle.api.authenticate()
|
|
||||||
kaggle.api.dataset_download_files('uciml/breast-cancer-wisconsin-data', path='./datasets', unzip=True)
|
|
||||||
|
|
||||||
# Load the dataset
|
# Load the dataset
|
||||||
df = pd.read_csv('./datasets/data.csv', index_col='id')
|
df = pd.read_csv('./datasets/data.csv', index_col='id')
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user