dockerfile
This commit is contained in:
parent
bd18c8b09d
commit
6cbad8e595
3
.gitignore
vendored
3
.gitignore
vendored
@ -150,3 +150,6 @@ crashlytics.properties
|
||||
crashlytics-build.properties
|
||||
fabric.properties
|
||||
|
||||
# kaggle
|
||||
.kaggle.json
|
||||
|
||||
|
26
dockerfile
Normal file
26
dockerfile
Normal file
@ -0,0 +1,26 @@
|
||||
# Nasz obraz będzie dzidziczył z obrazu Ubuntu w wersji latest
|
||||
FROM ubuntu:latest
|
||||
|
||||
# Instalujemy niezbędne zależności. Zwróć uwagę na flagę "-y" (assume yes)
|
||||
RUN apt update && apt install -y figlet python3 python3-pip unzip
|
||||
RUN pip3 install --user kaggle
|
||||
RUN pip3 install --user pandas
|
||||
# RUN pip3 install --user unzip
|
||||
|
||||
ENV PATH="/root/.local/bin:${PATH}"
|
||||
|
||||
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
|
||||
WORKDIR /app
|
||||
|
||||
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
|
||||
COPY ./figlet-loop.sh ./
|
||||
COPY ./download.sh ./
|
||||
COPY ./script.py ./
|
||||
COPY ./kaggle.json /root/.kaggle/kaggle.json
|
||||
|
||||
RUN ./download.sh 117928
|
||||
RUN python3 ./script.py
|
||||
|
||||
|
||||
# Domyślne polecenie, które zostanie uruchomione w kontenerze po jego starcie
|
||||
# CMD python ./script.py
|
4
figlet-loop.sh
Executable file
4
figlet-loop.sh
Executable file
@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
while read line; do
|
||||
figlet "$line"
|
||||
done
|
45
script.py
45
script.py
@ -1,5 +1,8 @@
|
||||
import subprocess
|
||||
import sys
|
||||
import pandas as pd
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
|
||||
def install_dependencies():
|
||||
@ -24,21 +27,20 @@ def download_dataset():
|
||||
def divide_dataset(dataset):
|
||||
"""Split dataset to dev, train, test datasets. """
|
||||
|
||||
os.system('cat Car_Prices_Poland_Kaggle.csv | shuf > Car_Prices_Poland_Kaggle_shuf.csv')
|
||||
os.system('tail -n +2 Car_Prices_Poland_Kaggle.csv | shuf > ./Car_Prices_Poland_Kaggle_shuf.csv')
|
||||
|
||||
len_train = len(dataset) // 10 * 6
|
||||
len_dev = len(dataset) // 10 * 2
|
||||
len_test = len(dataset) // 10 * 2
|
||||
len1 = len(dataset) // 6
|
||||
len2 = (len1 * 2) +1
|
||||
|
||||
if len_test + len_train + len_dev != len(dataset):
|
||||
len_train += len(dataset) - (len_test + len_train + len_dev)
|
||||
|
||||
os.system(f'head -n {len_train} Car_Prices_Poland_Kaggle.csv | shuf > Car_Prices_Poland_Kaggle_train.csv')
|
||||
os.system(f'head -n {len_dev} Car_Prices_Poland_Kaggle.csv | shuf > Car_Prices_Poland_Kaggle_dev.csv')
|
||||
os.system(f'head -n {len_test} Car_Prices_Poland_Kaggle.csv | shuf > Car_Prices_Poland_Kaggle_test.csv')
|
||||
os.system(f'head -n {len1} Car_Prices_Poland_Kaggle_shuf.csv f > Car_Prices_Poland_Kaggle_dev.csv')
|
||||
os.system(f'head -n {len1} Car_Prices_Poland_Kaggle_shuf.csv| tail -n {len1} > Car_Prices_Poland_Kaggle_test.csv')
|
||||
os.system(f'tail -n +{len2} Car_Prices_Poland_Kaggle_shuf.csv > Car_Prices_Poland_Kaggle_train.csv')
|
||||
|
||||
os.system('rm ./Car_Prices_Poland_Kaggle_shuf.csv')
|
||||
print("Len match: " + str(sum([len_test, len_dev, len_train]) == len(dataset)))
|
||||
print("Len match: " + str(sum([len1 * 2, len2]) == len(dataset)))
|
||||
os.system('cat Car_Prices_Poland_Kaggle_train.csv | wc -l')
|
||||
os.system('cat Car_Prices_Poland_Kaggle_dev.csv | wc -l')
|
||||
os.system('cat Car_Prices_Poland_Kaggle_test.csv | wc -l')
|
||||
|
||||
|
||||
def get_statistics(dataset):
|
||||
@ -57,31 +59,24 @@ def normalize_dataset(dataset):
|
||||
|
||||
# drop columns
|
||||
dataset.drop(columns=["Unnamed: 0", "generation_name"], inplace=True)
|
||||
dataset = dataset.dropna()
|
||||
|
||||
# normalize numbers to [0, 1]
|
||||
for column in dataset.columns:
|
||||
if isinstance(dataset.iloc[1][column], np.int64) or isinstance(dataset.iloc[1][column], np.float64):
|
||||
dataset[column] = (dataset[column] - dataset[column].min()) / (
|
||||
dataset[column].max() - dataset[column].min())
|
||||
|
||||
# There is no null rows
|
||||
# dataset.isnull().sum()
|
||||
|
||||
return dataset
|
||||
|
||||
|
||||
install_dependencies()
|
||||
|
||||
import pandas as pd
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
download_dataset()
|
||||
# install_dependencies()
|
||||
# download_dataset()
|
||||
unzip_package()
|
||||
cars = pd.read_csv('./Car_Prices_Poland_Kaggle.csv')
|
||||
normalize_dataset(cars)
|
||||
divide_dataset(cars)
|
||||
get_statistics(cars)
|
||||
df = pd.DataFrame(cars)
|
||||
df = normalize_dataset(df)
|
||||
divide_dataset(df)
|
||||
get_statistics(df)
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user