This commit is contained in:
Mikołaj Pokrywka 2022-04-03 18:47:33 +02:00
parent f5528b9b8d
commit 36c6deaeea
2 changed files with 6 additions and 6 deletions

View File

@ -10,7 +10,7 @@ RUN pip3 install --upgrade pip
RUN pip3 install pandas
RUN pip3 install numpy
RUN pip3 install kaggle
RUN apt-get install zip unzip
ARG CUTOFF
ARG KAGGLE_USERNAME
ARG KAGGLE_KEY
@ -27,4 +27,4 @@ COPY ./process_data.sh .
COPY ./download_data_and_process.py .
COPY ./stats.py .
# RUN ./process_data.sh
RUN ./process_data.sh

View File

@ -1,17 +1,17 @@
import subprocess
import pandas as pd
import numpy as np
import kaggle
# import kaggle
kaggle.api.authenticate()
kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='fake_job_postings.csv', unzip=True)
# kaggle.api.authenticate()
# kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='fake_job_postings.csv', unzip=True)
data=pd.read_csv('fake_job_postings.csv/fake_job_postings.csv')
data=pd.read_csv('fake_job_postings.csv')
data = data.replace(np.nan, '', regex=True)
print("="*20)