diff --git a/Jenkinsfile b/Jenkinsfile index 85899ee..58031b7 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -7,7 +7,7 @@ pipeline { description: 'Kaggle username' ) password ( - name: 'KAGGLE_KEY', + name: 'API_KEY', defaultValue: '', description: 'Kaggle API key' ) @@ -28,6 +28,13 @@ pipeline { checkout scm } } + stage('Download dataset') { + steps { + withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { + sh "kaggle datasets download -d nikhil7280/student-performance-multiple-linear-regression --unzip" + } + } + } stage('Prepare dataset') { agent { dockerfile { @@ -38,7 +45,7 @@ pipeline { steps { sh "chmod +x ./get_dataset.py" - sh "python ./get_dataset.py ${params.KAGGLE_USERNAME} ${params.KAGGLE_KEY}" + sh "python ./get_dataset.py" archiveArtifacts artifacts: 'dataset.csv,df_train.csv,df_test.csv', onlyIfSuccessful: true } } diff --git a/get_dataset.py b/get_dataset.py index 61cb483..128fdc8 100644 --- a/get_dataset.py +++ b/get_dataset.py @@ -1,13 +1,6 @@ import pandas as pd from sklearn import preprocessing from sklearn.model_selection import train_test_split -import sys -import os - -os.environ["KAGGLE_USERNAME"] = sys.argv[1] -os.environ["KAGGLE_KEY"] = sys.argv[2] - -os.system("kaggle datasets download -d nikhil7280/student-performance-multiple-linear-regression --unzip") data = pd.read_csv("Student_Performance.csv") print(data.head()) diff --git a/requirements.txt b/requirements.txt index fef1509..fb4d288 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -kaggle pandas scikit-learn \ No newline at end of file