From 9ee3ef9d6b88760a8b22930e4a1ef210e741dfa8 Mon Sep 17 00:00:00 2001 From: Marcin Kostrzewski Date: Fri, 1 Apr 2022 22:25:05 +0200 Subject: [PATCH] Dockerization --- Dockerfile | 17 +++++++++++++++++ power_plant_data_stats.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 Dockerfile create mode 100644 power_plant_data_stats.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..58a8730 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.9 + +WORKDIR /app + +ADD ./requirements.txt . +RUN pip install -r requirements.txt + +ADD . . +ARG KAGGLE_USERNAME +ARG KAGGLE_KEY +ENV KAGGLE_USERNAME=${KAGGLE_USERNAME} +ENV KAGGLE_KEY=${KAGGLE_KEY} + +RUN chmod u+x ./download_dataset.sh +RUN ./download_dataset.sh + +CMD python power_plant_data_stats.py diff --git a/power_plant_data_stats.py b/power_plant_data_stats.py new file mode 100644 index 0000000..bc505b8 --- /dev/null +++ b/power_plant_data_stats.py @@ -0,0 +1,30 @@ +print('# statystyki dla pełnego zbioru') + +import pandas as pd +plant_all = pd.read_csv('data/Plant_1_Generation_Data.csv') +print(plant_all.describe(include='all')) + +print('# statystyki dla zbioru dev') + +plant_dev = pd.read_csv('data/Plant_1_Generation_Data.csv.dev') +print(plant_dev.describe(include='all')) + +print('# statystyki dla zbioru test') + +plant_test = pd.read_csv('data/Plant_1_Generation_Data.csv.test') +print(plant_test.describe(include='all')) + +print('# statystyki dla zbioru train') + +plant_train = pd.read_csv('data/Plant_1_Generation_Data.csv.train') +print(plant_train.describe(include='all')) + +print('# normalizacja') + + +plant_normalized = plant_test.copy() +column = 'DC_POWER' + +plant_normalized[column] = plant_normalized[column] / plant_normalized[column].abs().max() + +print(plant_normalized) \ No newline at end of file