Dockerization

This commit is contained in:
Marcin Kostrzewski 2022-04-01 22:25:05 +02:00
parent 66aea79d9e
commit 9ee3ef9d6b
2 changed files with 47 additions and 0 deletions

17
Dockerfile Normal file
View File

@ -0,0 +1,17 @@
FROM python:3.9
WORKDIR /app
ADD ./requirements.txt .
RUN pip install -r requirements.txt
ADD . .
ARG KAGGLE_USERNAME
ARG KAGGLE_KEY
ENV KAGGLE_USERNAME=${KAGGLE_USERNAME}
ENV KAGGLE_KEY=${KAGGLE_KEY}
RUN chmod u+x ./download_dataset.sh
RUN ./download_dataset.sh
CMD python power_plant_data_stats.py

30
power_plant_data_stats.py Normal file
View File

@ -0,0 +1,30 @@
print('# statystyki dla pełnego zbioru')
import pandas as pd
plant_all = pd.read_csv('data/Plant_1_Generation_Data.csv')
print(plant_all.describe(include='all'))
print('# statystyki dla zbioru dev')
plant_dev = pd.read_csv('data/Plant_1_Generation_Data.csv.dev')
print(plant_dev.describe(include='all'))
print('# statystyki dla zbioru test')
plant_test = pd.read_csv('data/Plant_1_Generation_Data.csv.test')
print(plant_test.describe(include='all'))
print('# statystyki dla zbioru train')
plant_train = pd.read_csv('data/Plant_1_Generation_Data.csv.train')
print(plant_train.describe(include='all'))
print('# normalizacja')
plant_normalized = plant_test.copy()
column = 'DC_POWER'
plant_normalized[column] = plant_normalized[column] / plant_normalized[column].abs().max()
print(plant_normalized)