This commit is contained in:
Mikołaj Pokrywka 2022-04-03 15:16:37 +02:00
parent 8a64c6abaf
commit 36f0ac9370
2 changed files with 5 additions and 4 deletions

3
Jenkinsfile vendored
View File

@ -32,7 +32,8 @@ pipeline {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
"KAGGLE_KEY=${params.KAGGLE_KEY}", "KAGGLE_KEY=${params.KAGGLE_KEY}",
"CUTOFF=${params.CUTOFF}"]) { "CUTOFF=${params.CUTOFF}"]) {
sh 'python3 ./download_data_and_process.py' sh './process_data.sh'
// sh 'python3 ./download_data_and_process.py'
archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv" archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv"
} }
} }

View File

@ -1,10 +1,10 @@
import subprocess import subprocess
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import kaggle # import kaggle
kaggle.api.authenticate() # kaggle.api.authenticate()
kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='fake_job_postings.csv', unzip=True) # kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='fake_job_postings.csv', unzip=True)
data=pd.read_csv('fake_job_postings.csv/fake_job_postings.csv') data=pd.read_csv('fake_job_postings.csv/fake_job_postings.csv')
data = data.replace(np.nan, '', regex=True) data = data.replace(np.nan, '', regex=True)