diff --git a/Jenkinsfile b/Jenkinsfile index 6d6da6c..4670b24 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -37,7 +37,7 @@ pipeline { // sh './process_data.sh' sh 'ls' sh 'python3 ./download_data_and_process.py' - archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv, fake_job_postings.csv" + archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv, data.csv" } } } diff --git a/download_data_and_process.py b/download_data_and_process.py index 6f416c1..4187723 100644 --- a/download_data_and_process.py +++ b/download_data_and_process.py @@ -5,7 +5,13 @@ import numpy as np # kaggle.api.authenticate() # kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='fake_job_postings.csv', unzip=True) -data=pd.read_csv('fake_job_postings.csv') + + + + + + +data=pd.read_csv('data.csv') data = data.replace(np.nan, '', regex=True) print("="*20) diff --git a/process_data.sh b/process_data.sh index 6accf1e..a7dcfe6 100755 --- a/process_data.sh +++ b/process_data.sh @@ -4,7 +4,6 @@ echo $KAGGLE_USERNAME kaggle datasets download -d shivamb/real-or-fake-fake-jobposting-prediction unzip -o real-or-fake-fake-jobposting-prediction.zip ls -cat fake_job_postings.csv > data.csv echo "Save column titles" head -n 1 fake_job_postings.csv > column_titles.csv tail -n +2 fake_job_postings.csv > data_not_shuf.csv