diff --git a/Jenkinsfile b/Jenkinsfile index 1b2de93..74c9c74 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -35,6 +35,7 @@ pipeline { steps { withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) { + sh 'kaggle datasets download muhammadbinimran/housing-price-prediction-data --unzip' sh 'chmod 777 ./data_processing.py' sh "python3 ./data_processing.py ${params.CUTOFF}" } diff --git a/data_processing.py b/data_processing.py index adb9b42..061d283 100644 --- a/data_processing.py +++ b/data_processing.py @@ -2,9 +2,7 @@ import sklearn from sklearn.preprocessing import OneHotEncoder from sklearn.model_selection import train_test_split import pandas as pd -import subprocess -subprocess.run(["kaggle", "datasets", "download", "muhammadbinimran/housing-price-prediction-data", "--unzip"]) housing_price_dataset = pd.read_csv('housing_price_dataset.csv') hp_train_test, hp_dev = sklearn.model_selection.train_test_split(housing_price_dataset, test_size=0.1)