diff --git a/Jenkinsfile b/Jenkinsfile index 016f19d..12bb502 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -32,7 +32,8 @@ pipeline { withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}", "CUTOFF=${params.CUTOFF}"]) { - sh 'python3 ./download_data_and_process.py' + sh './process_data.sh' + // sh 'python3 ./download_data_and_process.py' archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv" } } diff --git a/download_data_and_process.py b/download_data_and_process.py index 9df15b5..cf9c33d 100644 --- a/download_data_and_process.py +++ b/download_data_and_process.py @@ -1,10 +1,10 @@ import subprocess import pandas as pd import numpy as np -import kaggle +# import kaggle -kaggle.api.authenticate() -kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='fake_job_postings.csv', unzip=True) +# kaggle.api.authenticate() +# kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='fake_job_postings.csv', unzip=True) data=pd.read_csv('fake_job_postings.csv/fake_job_postings.csv') data = data.replace(np.nan, '', regex=True)