From 6bca27c999b7242fd83147999dfea1c18be6975a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Pokrywka?= Date: Sun, 3 Apr 2022 14:01:27 +0200 Subject: [PATCH] r --- Jenkinsfile | 1 - download_data_and_process.py | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 95d8f12..a4cdb34 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -34,7 +34,6 @@ pipeline { withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}", "CUTOFF=${params.CUTOFF}"]) { - sh "./process_data.sh" sh 'python3 ./download_data_and_process.py' archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv" } diff --git a/download_data_and_process.py b/download_data_and_process.py index eb23b6a..3fed7a6 100644 --- a/download_data_and_process.py +++ b/download_data_and_process.py @@ -1,6 +1,10 @@ import subprocess import pandas as pd import numpy as np +import kaggle + +kaggle.api.authenticate() +kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='fake_job_postings.csv', unzip=True) data=pd.read_csv('fake_job_postings.csv') data = data.replace(np.nan, '', regex=True)