This commit is contained in:
Sheaza 2024-04-02 20:24:31 +02:00
parent 102c11792c
commit 008f9ef6cf
3 changed files with 9 additions and 10 deletions

11
Jenkinsfile vendored
View File

@ -7,7 +7,7 @@ pipeline {
description: 'Kaggle username' description: 'Kaggle username'
) )
password ( password (
name: 'KAGGLE_KEY', name: 'API_KEY',
defaultValue: '', defaultValue: '',
description: 'Kaggle API key' description: 'Kaggle API key'
) )
@ -28,6 +28,13 @@ pipeline {
checkout scm checkout scm
} }
} }
stage('Download dataset') {
steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh "kaggle datasets download -d nikhil7280/student-performance-multiple-linear-regression --unzip"
}
}
}
stage('Prepare dataset') { stage('Prepare dataset') {
agent { agent {
dockerfile { dockerfile {
@ -38,7 +45,7 @@ pipeline {
steps { steps {
sh "chmod +x ./get_dataset.py" sh "chmod +x ./get_dataset.py"
sh "python ./get_dataset.py ${params.KAGGLE_USERNAME} ${params.KAGGLE_KEY}" sh "python ./get_dataset.py"
archiveArtifacts artifacts: 'dataset.csv,df_train.csv,df_test.csv', onlyIfSuccessful: true archiveArtifacts artifacts: 'dataset.csv,df_train.csv,df_test.csv', onlyIfSuccessful: true
} }
} }

View File

@ -1,13 +1,6 @@
import pandas as pd import pandas as pd
from sklearn import preprocessing from sklearn import preprocessing
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import sys
import os
os.environ["KAGGLE_USERNAME"] = sys.argv[1]
os.environ["KAGGLE_KEY"] = sys.argv[2]
os.system("kaggle datasets download -d nikhil7280/student-performance-multiple-linear-regression --unzip")
data = pd.read_csv("Student_Performance.csv") data = pd.read_csv("Student_Performance.csv")
print(data.head()) print(data.head())

View File

@ -1,3 +1,2 @@
kaggle
pandas pandas
scikit-learn scikit-learn