From d83a35ed0e40c8bd6dccee926c7f6a6721c89b4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Pokrywka?= Date: Sun, 3 Apr 2022 15:40:56 +0200 Subject: [PATCH] r --- download_data_and_process.py | 2 +- process_data.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/download_data_and_process.py b/download_data_and_process.py index 6f416c1..2df0529 100644 --- a/download_data_and_process.py +++ b/download_data_and_process.py @@ -5,7 +5,7 @@ import numpy as np # kaggle.api.authenticate() # kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='fake_job_postings.csv', unzip=True) -data=pd.read_csv('fake_job_postings.csv') +data=pd.read_csv('/data/fake_job_postings.csv') data = data.replace(np.nan, '', regex=True) print("="*20) diff --git a/process_data.sh b/process_data.sh index 181bd1b..9c3809c 100755 --- a/process_data.sh +++ b/process_data.sh @@ -3,6 +3,7 @@ echo "Download data from kaggle" echo $KAGGLE_USERNAME kaggle datasets download -d shivamb/real-or-fake-fake-jobposting-prediction unzip -o real-or-fake-fake-jobposting-prediction.zip +cp fake_job_postings.csv.csv /data/fake_job_postings.csv echo "Save column titles" head -n 1 fake_job_postings.csv > column_titles.csv tail -n +2 fake_job_postings.csv > data_not_shuf.csv