This commit is contained in:
Mikołaj Pokrywka 2022-04-03 18:34:04 +02:00
parent 584f945656
commit d45ca7b820
4 changed files with 24 additions and 12 deletions

View File

@ -1,22 +1,29 @@
FROM ubuntu:latest
FROM python:3.8
RUN apt update
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
WORKDIR /app
COPY ./requirements.txt .
RUN apt-get update
RUN apt-get install -y python3-pip
RUN pip3 install --upgrade pip
RUN pip3 install -r ./requirements.txt
RUN pip3 install kaggle
ARG CUTOFF
ARG KAGGLE_USERNAME
ARG KAGGLE_KEY
ENV CUTOFF=${CUTOFF}
ENV KAGGLE_USERNAME=${KAGGLE_USERNAME}
ENV KAGGLE_KEY=${KAGGLE_KEY}
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
COPY ./process_data.sh ./
COPY ./download_data_and_process.py ./
COPY ./stats.py ./
RUN ./process_data.sh
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
RUN mkdir /data
WORKDIR /app
COPY ./process_data.sh .
COPY ./download_data_and_process.py .
COPY ./stats.py .
# RUN ./process_data.sh

2
Jenkinsfile vendored
View File

@ -34,7 +34,7 @@ pipeline {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
"KAGGLE_KEY=${params.KAGGLE_KEY}",
"CUTOFF=${params.CUTOFF}"]) {
// sh './process_data.sh'
sh './process_data.sh'
sh 'ls'
sh 'wc data_train.csv'
sh 'cat column_titles.csv'

View File

@ -1,17 +1,17 @@
import subprocess
import pandas as pd
import numpy as np
# import kaggle
import kaggle
# kaggle.api.authenticate()
# kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='fake_job_postings.csv', unzip=True)
kaggle.api.authenticate()
kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='fake_job_postings.csv', unzip=True)
data=pd.read_csv('data.csv')
data=pd.read_csv('fake_job_postings.csv/fake_job_postings.csv')
data = data.replace(np.nan, '', regex=True)
print("="*20)

View File

@ -4,6 +4,11 @@ echo $KAGGLE_USERNAME
kaggle datasets download -d shivamb/real-or-fake-fake-jobposting-prediction
unzip -o real-or-fake-fake-jobposting-prediction.zip
ls
wc fake_job_postings.csv
cp fake_job_postings.csv /data/fake_job_postings.csv
cd data
ls
wc fake_job_postings.csv
echo "Save column titles"
head -n 1 fake_job_postings.csv > column_titles.csv
tail -n +2 fake_job_postings.csv > data_not_shuf.csv