created docker

This commit is contained in:
Mikołaj Pokrywka 2022-04-02 14:15:19 +02:00
parent 9fe588aa96
commit 3cb8a4a952
5 changed files with 41 additions and 1 deletions

16
Dockerfile Normal file
View File

@ -0,0 +1,16 @@
FROM ubuntu:latest
FROM python:3.8
RUN apt update
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
WORKDIR /app
COPY ./requirements.txt .
RUN pip3 install -r ./requirements.txt
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
COPY ./process_data.sh ./
COPY ./download_data_and_process.py ./
# Domyślne polecenie, które zostanie uruchomione w kontenerze po jego starcie
CMD python ./download_data_and_process.py

3
README.md Normal file
View File

@ -0,0 +1,3 @@
# Run with docker
`docker build -t ium .`
`docker run -i -e KAGGLE_USERNAME='your_kaggle_username' -e KAGGLE_KEY='<your_kaggle_key>' -i ium:latest`

View File

@ -0,0 +1,18 @@
import subprocess
import pandas as pd
import numpy as np
rc = subprocess.call("./process_data.sh")
data=pd.read_csv('fake_job_postings.csv')
data = data.replace(np.nan, '', regex=True)
print("="*20)
print('Ilość wierszy w zbiorze: ',len(data))
print("="*10, ' data["department"].value_counts() ', 10*'=')
print(data["department"].value_counts())
print("="*10, ' data.median() ', 10*'=')
print(data.median())
print("="*10, ' data.describe(include="all") ', 10*'=')
print(data.describe(include='all'))

View File

@ -12,4 +12,4 @@ head -n $CUTOFF data_not_cutted.csv > data.csv
sed -n '1,2500p' data.csv > data_test.csv
sed -n '2501,5000p' data.csv > data_dev.csv
tail -n +5001 data.csv > data_train.csv
rm data.csv real-or-fake-fake-jobposting-prediction.zip fake_job_postings.csv column_titles.csv data_not_shuf.csv data_not_cutted.csv
rm data.csv real-or-fake-fake-jobposting-prediction.zip column_titles.csv data_not_shuf.csv data_not_cutted.csv

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
pandas==1.4.1
kaggle==1.5.12
numpy==1.22.3