created docker
This commit is contained in:
parent
9fe588aa96
commit
3cb8a4a952
16
Dockerfile
Normal file
16
Dockerfile
Normal file
@ -0,0 +1,16 @@
|
||||
FROM ubuntu:latest
|
||||
FROM python:3.8
|
||||
RUN apt update
|
||||
|
||||
|
||||
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
|
||||
WORKDIR /app
|
||||
COPY ./requirements.txt .
|
||||
RUN pip3 install -r ./requirements.txt
|
||||
|
||||
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
|
||||
COPY ./process_data.sh ./
|
||||
COPY ./download_data_and_process.py ./
|
||||
|
||||
# Domyślne polecenie, które zostanie uruchomione w kontenerze po jego starcie
|
||||
CMD python ./download_data_and_process.py
|
3
README.md
Normal file
3
README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# Run with docker
|
||||
`docker build -t ium .`
|
||||
`docker run -i -e KAGGLE_USERNAME='your_kaggle_username' -e KAGGLE_KEY='<your_kaggle_key>' -i ium:latest`
|
18
download_data_and_process.py
Normal file
18
download_data_and_process.py
Normal file
@ -0,0 +1,18 @@
|
||||
import subprocess
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
rc = subprocess.call("./process_data.sh")
|
||||
data=pd.read_csv('fake_job_postings.csv')
|
||||
data = data.replace(np.nan, '', regex=True)
|
||||
|
||||
print("="*20)
|
||||
print('Ilość wierszy w zbiorze: ',len(data))
|
||||
|
||||
print("="*10, ' data["department"].value_counts() ', 10*'=')
|
||||
print(data["department"].value_counts())
|
||||
|
||||
print("="*10, ' data.median() ', 10*'=')
|
||||
print(data.median())
|
||||
|
||||
print("="*10, ' data.describe(include="all") ', 10*'=')
|
||||
print(data.describe(include='all'))
|
@ -12,4 +12,4 @@ head -n $CUTOFF data_not_cutted.csv > data.csv
|
||||
sed -n '1,2500p' data.csv > data_test.csv
|
||||
sed -n '2501,5000p' data.csv > data_dev.csv
|
||||
tail -n +5001 data.csv > data_train.csv
|
||||
rm data.csv real-or-fake-fake-jobposting-prediction.zip fake_job_postings.csv column_titles.csv data_not_shuf.csv data_not_cutted.csv
|
||||
rm data.csv real-or-fake-fake-jobposting-prediction.zip column_titles.csv data_not_shuf.csv data_not_cutted.csv
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
pandas==1.4.1
|
||||
kaggle==1.5.12
|
||||
numpy==1.22.3
|
Loading…
Reference in New Issue
Block a user