diff --git a/README.md b/README.md
index 8992dab..7d4927d 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,24 @@
# ium_444409
-Zadania realizowane w ramach zajęć Inżynieria Uczenia Maszynowego
\ No newline at end of file
+Zadania realizowane w ramach zajęć Inżynieria Uczenia Maszynowego.
+
+## Zbiór
+***Solar Power Generation Data***
+https://www.kaggle.com/datasets/anikannal/solar-power-generation-data?select=Plant_1_Generation_Data.csv
+
+## Wymagania
+- `python3`
+- `pip`
+- API token z `kaggle.com`
+
+## Uruchamianie
+- Instalujemy potrzebne pakiety:
+```sh
+$ pip install -r requirements.txt
+```
+- Pobieramy zbiór danych z Kaggle. Skorzystamy ze skryptu w repo, który pobierze i podzieli dane na podzbiory:
+```
+$ ./download_dataset.sh
+```
+
+
diff --git a/power_plant_data_stats.ipynb b/power_plant_data_stats.ipynb
new file mode 100644
index 0000000..5f7edfb
--- /dev/null
+++ b/power_plant_data_stats.ipynb
@@ -0,0 +1,988 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE_TIME | \n",
+ " PLANT_ID | \n",
+ " SOURCE_KEY | \n",
+ " DC_POWER | \n",
+ " AC_POWER | \n",
+ " DAILY_YIELD | \n",
+ " TOTAL_YIELD | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 68778 | \n",
+ " 68778.0 | \n",
+ " 68778 | \n",
+ " 68778.000000 | \n",
+ " 68778.000000 | \n",
+ " 68778.000000 | \n",
+ " 6.877800e+04 | \n",
+ "
\n",
+ " \n",
+ " unique | \n",
+ " 3158 | \n",
+ " NaN | \n",
+ " 22 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " top | \n",
+ " 01-06-2020 12:45 | \n",
+ " NaN | \n",
+ " bvBOhCH3iADSZry | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " freq | \n",
+ " 22 | \n",
+ " NaN | \n",
+ " 3155 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 3147.426211 | \n",
+ " 307.802752 | \n",
+ " 3295.968737 | \n",
+ " 6.978712e+06 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " 4036.457169 | \n",
+ " 394.396439 | \n",
+ " 3145.178309 | \n",
+ " 4.162720e+05 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 6.183645e+06 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 6.512003e+06 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 429.000000 | \n",
+ " 41.493750 | \n",
+ " 2658.714286 | \n",
+ " 7.146685e+06 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 6366.964286 | \n",
+ " 623.618750 | \n",
+ " 6274.000000 | \n",
+ " 7.268706e+06 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 14471.125000 | \n",
+ " 1410.950000 | \n",
+ " 9163.000000 | \n",
+ " 7.846821e+06 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE_TIME PLANT_ID SOURCE_KEY DC_POWER \\\n",
+ "count 68778 68778.0 68778 68778.000000 \n",
+ "unique 3158 NaN 22 NaN \n",
+ "top 01-06-2020 12:45 NaN bvBOhCH3iADSZry NaN \n",
+ "freq 22 NaN 3155 NaN \n",
+ "mean NaN 4135001.0 NaN 3147.426211 \n",
+ "std NaN 0.0 NaN 4036.457169 \n",
+ "min NaN 4135001.0 NaN 0.000000 \n",
+ "25% NaN 4135001.0 NaN 0.000000 \n",
+ "50% NaN 4135001.0 NaN 429.000000 \n",
+ "75% NaN 4135001.0 NaN 6366.964286 \n",
+ "max NaN 4135001.0 NaN 14471.125000 \n",
+ "\n",
+ " AC_POWER DAILY_YIELD TOTAL_YIELD \n",
+ "count 68778.000000 68778.000000 6.877800e+04 \n",
+ "unique NaN NaN NaN \n",
+ "top NaN NaN NaN \n",
+ "freq NaN NaN NaN \n",
+ "mean 307.802752 3295.968737 6.978712e+06 \n",
+ "std 394.396439 3145.178309 4.162720e+05 \n",
+ "min 0.000000 0.000000 6.183645e+06 \n",
+ "25% 0.000000 0.000000 6.512003e+06 \n",
+ "50% 41.493750 2658.714286 7.146685e+06 \n",
+ "75% 623.618750 6274.000000 7.268706e+06 \n",
+ "max 1410.950000 9163.000000 7.846821e+06 "
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# statystyki dla pełnego zbioru\n",
+ "\n",
+ "import pandas as pd\n",
+ "plant_all = pd.read_csv('data/Plant_1_Generation_Data.csv')\n",
+ "plant_all.describe(include='all')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE_TIME | \n",
+ " PLANT_ID | \n",
+ " SOURCE_KEY | \n",
+ " DC_POWER | \n",
+ " AC_POWER | \n",
+ " DAILY_YIELD | \n",
+ " TOTAL_YIELD | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 6877 | \n",
+ " 6877.0 | \n",
+ " 6877 | \n",
+ " 6877.000000 | \n",
+ " 6877.000000 | \n",
+ " 6877.000000 | \n",
+ " 6.877000e+03 | \n",
+ "
\n",
+ " \n",
+ " unique | \n",
+ " 2833 | \n",
+ " NaN | \n",
+ " 22 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " top | \n",
+ " 01-06-2020 00:00 | \n",
+ " NaN | \n",
+ " 1BY6WEcLGh8j5v7 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " freq | \n",
+ " 8 | \n",
+ " NaN | \n",
+ " 345 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 3260.482360 | \n",
+ " 318.857642 | \n",
+ " 3310.769269 | \n",
+ " 6.974811e+06 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " 4068.560282 | \n",
+ " 397.532031 | \n",
+ " 3139.906175 | \n",
+ " 4.218293e+05 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 6.183645e+06 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 6.497496e+06 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 680.285714 | \n",
+ " 65.914286 | \n",
+ " 2652.714286 | \n",
+ " 7.143812e+06 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 6623.571429 | \n",
+ " 648.842857 | \n",
+ " 6277.000000 | \n",
+ " 7.266135e+06 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 14418.428570 | \n",
+ " 1405.800000 | \n",
+ " 9163.000000 | \n",
+ " 7.846821e+06 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE_TIME PLANT_ID SOURCE_KEY DC_POWER \\\n",
+ "count 6877 6877.0 6877 6877.000000 \n",
+ "unique 2833 NaN 22 NaN \n",
+ "top 01-06-2020 00:00 NaN 1BY6WEcLGh8j5v7 NaN \n",
+ "freq 8 NaN 345 NaN \n",
+ "mean NaN 4135001.0 NaN 3260.482360 \n",
+ "std NaN 0.0 NaN 4068.560282 \n",
+ "min NaN 4135001.0 NaN 0.000000 \n",
+ "25% NaN 4135001.0 NaN 0.000000 \n",
+ "50% NaN 4135001.0 NaN 680.285714 \n",
+ "75% NaN 4135001.0 NaN 6623.571429 \n",
+ "max NaN 4135001.0 NaN 14418.428570 \n",
+ "\n",
+ " AC_POWER DAILY_YIELD TOTAL_YIELD \n",
+ "count 6877.000000 6877.000000 6.877000e+03 \n",
+ "unique NaN NaN NaN \n",
+ "top NaN NaN NaN \n",
+ "freq NaN NaN NaN \n",
+ "mean 318.857642 3310.769269 6.974811e+06 \n",
+ "std 397.532031 3139.906175 4.218293e+05 \n",
+ "min 0.000000 0.000000 6.183645e+06 \n",
+ "25% 0.000000 0.000000 6.497496e+06 \n",
+ "50% 65.914286 2652.714286 7.143812e+06 \n",
+ "75% 648.842857 6277.000000 7.266135e+06 \n",
+ "max 1405.800000 9163.000000 7.846821e+06 "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# statystyki dla zbioru dev\n",
+ "\n",
+ "plant_dev = pd.read_csv('data/Plant_1_Generation_Data.csv.dev')\n",
+ "plant_dev.describe(include='all')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE_TIME | \n",
+ " PLANT_ID | \n",
+ " SOURCE_KEY | \n",
+ " DC_POWER | \n",
+ " AC_POWER | \n",
+ " DAILY_YIELD | \n",
+ " TOTAL_YIELD | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 6877 | \n",
+ " 6877.0 | \n",
+ " 6877 | \n",
+ " 6877.000000 | \n",
+ " 6877.000000 | \n",
+ " 6877.000000 | \n",
+ " 6.877000e+03 | \n",
+ "
\n",
+ " \n",
+ " unique | \n",
+ " 2831 | \n",
+ " NaN | \n",
+ " 22 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " top | \n",
+ " 03-06-2020 13:30 | \n",
+ " NaN | \n",
+ " z9Y9gH1T5YWrNuG | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " freq | \n",
+ " 9 | \n",
+ " NaN | \n",
+ " 363 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 3150.807630 | \n",
+ " 308.151426 | \n",
+ " 3305.763907 | \n",
+ " 6.981431e+06 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " 4020.609169 | \n",
+ " 392.878525 | \n",
+ " 3142.407510 | \n",
+ " 4.151093e+05 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 6.183645e+06 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 6.512002e+06 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 468.285714 | \n",
+ " 45.400000 | \n",
+ " 2682.285714 | \n",
+ " 7.149051e+06 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 6369.250000 | \n",
+ " 623.975000 | \n",
+ " 6274.000000 | \n",
+ " 7.271854e+06 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 14466.857140 | \n",
+ " 1410.528571 | \n",
+ " 9163.000000 | \n",
+ " 7.846821e+06 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE_TIME PLANT_ID SOURCE_KEY DC_POWER \\\n",
+ "count 6877 6877.0 6877 6877.000000 \n",
+ "unique 2831 NaN 22 NaN \n",
+ "top 03-06-2020 13:30 NaN z9Y9gH1T5YWrNuG NaN \n",
+ "freq 9 NaN 363 NaN \n",
+ "mean NaN 4135001.0 NaN 3150.807630 \n",
+ "std NaN 0.0 NaN 4020.609169 \n",
+ "min NaN 4135001.0 NaN 0.000000 \n",
+ "25% NaN 4135001.0 NaN 0.000000 \n",
+ "50% NaN 4135001.0 NaN 468.285714 \n",
+ "75% NaN 4135001.0 NaN 6369.250000 \n",
+ "max NaN 4135001.0 NaN 14466.857140 \n",
+ "\n",
+ " AC_POWER DAILY_YIELD TOTAL_YIELD \n",
+ "count 6877.000000 6877.000000 6.877000e+03 \n",
+ "unique NaN NaN NaN \n",
+ "top NaN NaN NaN \n",
+ "freq NaN NaN NaN \n",
+ "mean 308.151426 3305.763907 6.981431e+06 \n",
+ "std 392.878525 3142.407510 4.151093e+05 \n",
+ "min 0.000000 0.000000 6.183645e+06 \n",
+ "25% 0.000000 0.000000 6.512002e+06 \n",
+ "50% 45.400000 2682.285714 7.149051e+06 \n",
+ "75% 623.975000 6274.000000 7.271854e+06 \n",
+ "max 1410.528571 9163.000000 7.846821e+06 "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# statystyki dla zbioru test\n",
+ "\n",
+ "plant_test = pd.read_csv('data/Plant_1_Generation_Data.csv.test')\n",
+ "plant_test.describe(include='all')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE_TIME | \n",
+ " PLANT_ID | \n",
+ " SOURCE_KEY | \n",
+ " DC_POWER | \n",
+ " AC_POWER | \n",
+ " DAILY_YIELD | \n",
+ " TOTAL_YIELD | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 55024 | \n",
+ " 55024.0 | \n",
+ " 55024 | \n",
+ " 55024.000000 | \n",
+ " 55024.000000 | \n",
+ " 55024.000000 | \n",
+ " 5.502400e+04 | \n",
+ "
\n",
+ " \n",
+ " unique | \n",
+ " 3158 | \n",
+ " NaN | \n",
+ " 22 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " top | \n",
+ " 15-06-2020 09:30 | \n",
+ " NaN | \n",
+ " iCRJl6heRkivqQ3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " freq | \n",
+ " 22 | \n",
+ " NaN | \n",
+ " 2561 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 3132.873631 | \n",
+ " 306.377514 | \n",
+ " 3292.894721 | \n",
+ " 6.978859e+06 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " 4034.254455 | \n",
+ " 394.177510 | \n",
+ " 3146.231920 | \n",
+ " 4.157218e+05 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 6.183645e+06 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 6.514911e+06 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 400.500000 | \n",
+ " 38.720536 | \n",
+ " 2658.062500 | \n",
+ " 7.146685e+06 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 6337.535714 | \n",
+ " 620.728125 | \n",
+ " 6273.616072 | \n",
+ " 7.268792e+06 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " NaN | \n",
+ " 4135001.0 | \n",
+ " NaN | \n",
+ " 14471.125000 | \n",
+ " 1410.950000 | \n",
+ " 9163.000000 | \n",
+ " 7.846821e+06 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE_TIME PLANT_ID SOURCE_KEY DC_POWER \\\n",
+ "count 55024 55024.0 55024 55024.000000 \n",
+ "unique 3158 NaN 22 NaN \n",
+ "top 15-06-2020 09:30 NaN iCRJl6heRkivqQ3 NaN \n",
+ "freq 22 NaN 2561 NaN \n",
+ "mean NaN 4135001.0 NaN 3132.873631 \n",
+ "std NaN 0.0 NaN 4034.254455 \n",
+ "min NaN 4135001.0 NaN 0.000000 \n",
+ "25% NaN 4135001.0 NaN 0.000000 \n",
+ "50% NaN 4135001.0 NaN 400.500000 \n",
+ "75% NaN 4135001.0 NaN 6337.535714 \n",
+ "max NaN 4135001.0 NaN 14471.125000 \n",
+ "\n",
+ " AC_POWER DAILY_YIELD TOTAL_YIELD \n",
+ "count 55024.000000 55024.000000 5.502400e+04 \n",
+ "unique NaN NaN NaN \n",
+ "top NaN NaN NaN \n",
+ "freq NaN NaN NaN \n",
+ "mean 306.377514 3292.894721 6.978859e+06 \n",
+ "std 394.177510 3146.231920 4.157218e+05 \n",
+ "min 0.000000 0.000000 6.183645e+06 \n",
+ "25% 0.000000 0.000000 6.514911e+06 \n",
+ "50% 38.720536 2658.062500 7.146685e+06 \n",
+ "75% 620.728125 6273.616072 7.268792e+06 \n",
+ "max 1410.950000 9163.000000 7.846821e+06 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# statystyki dla zbioru train\n",
+ "\n",
+ "plant_train = pd.read_csv('data/Plant_1_Generation_Data.csv.train')\n",
+ "plant_train.describe(include='all')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE_TIME | \n",
+ " PLANT_ID | \n",
+ " SOURCE_KEY | \n",
+ " DC_POWER | \n",
+ " AC_POWER | \n",
+ " DAILY_YIELD | \n",
+ " TOTAL_YIELD | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 10-06-2020 22:45 | \n",
+ " 4135001 | \n",
+ " rGa61gmuvPhdLxV | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 6565.000000 | \n",
+ " 7310769.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 25-05-2020 07:15 | \n",
+ " 4135001 | \n",
+ " uHbuxQJl8lW7ozc | \n",
+ " 0.166544 | \n",
+ " 236.262500 | \n",
+ " 121.750000 | \n",
+ " 7111973.750 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 23-05-2020 17:45 | \n",
+ " 4135001 | \n",
+ " 1IF53ai7Xc0U56Y | \n",
+ " 0.109156 | \n",
+ " 154.485714 | \n",
+ " 8607.000000 | \n",
+ " 6249141.000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 15-05-2020 04:45 | \n",
+ " 4135001 | \n",
+ " 3PZuoBAID5Wc2HD | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 6987759.000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 12-06-2020 16:30 | \n",
+ " 4135001 | \n",
+ " iCRJl6heRkivqQ3 | \n",
+ " 0.191808 | \n",
+ " 272.157143 | \n",
+ " 5567.428571 | \n",
+ " 7391038.429 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 6872 | \n",
+ " 01-06-2020 10:00 | \n",
+ " 4135001 | \n",
+ " zBIq5rxdHJRwDNY | \n",
+ " 0.539282 | \n",
+ " 763.628571 | \n",
+ " 1779.285714 | \n",
+ " 6465018.286 | \n",
+ "
\n",
+ " \n",
+ " 6873 | \n",
+ " 27-05-2020 02:00 | \n",
+ " 4135001 | \n",
+ " VHMLBKoKgIrUVDU | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 7297615.000 | \n",
+ "
\n",
+ " \n",
+ " 6874 | \n",
+ " 31-05-2020 21:30 | \n",
+ " 4135001 | \n",
+ " 3PZuoBAID5Wc2HD | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 5816.000000 | \n",
+ " 7115304.000 | \n",
+ "
\n",
+ " \n",
+ " 6875 | \n",
+ " 11-06-2020 18:45 | \n",
+ " 4135001 | \n",
+ " ih0vzX44oOqAx2f | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 5521.000000 | \n",
+ " 6386553.000 | \n",
+ "
\n",
+ " \n",
+ " 6876 | \n",
+ " 16-06-2020 05:45 | \n",
+ " 4135001 | \n",
+ " 3PZuoBAID5Wc2HD | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 7225042.000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6877 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE_TIME PLANT_ID SOURCE_KEY DC_POWER AC_POWER \\\n",
+ "0 10-06-2020 22:45 4135001 rGa61gmuvPhdLxV 0.000000 0.000000 \n",
+ "1 25-05-2020 07:15 4135001 uHbuxQJl8lW7ozc 0.166544 236.262500 \n",
+ "2 23-05-2020 17:45 4135001 1IF53ai7Xc0U56Y 0.109156 154.485714 \n",
+ "3 15-05-2020 04:45 4135001 3PZuoBAID5Wc2HD 0.000000 0.000000 \n",
+ "4 12-06-2020 16:30 4135001 iCRJl6heRkivqQ3 0.191808 272.157143 \n",
+ "... ... ... ... ... ... \n",
+ "6872 01-06-2020 10:00 4135001 zBIq5rxdHJRwDNY 0.539282 763.628571 \n",
+ "6873 27-05-2020 02:00 4135001 VHMLBKoKgIrUVDU 0.000000 0.000000 \n",
+ "6874 31-05-2020 21:30 4135001 3PZuoBAID5Wc2HD 0.000000 0.000000 \n",
+ "6875 11-06-2020 18:45 4135001 ih0vzX44oOqAx2f 0.000000 0.000000 \n",
+ "6876 16-06-2020 05:45 4135001 3PZuoBAID5Wc2HD 0.000000 0.000000 \n",
+ "\n",
+ " DAILY_YIELD TOTAL_YIELD \n",
+ "0 6565.000000 7310769.000 \n",
+ "1 121.750000 7111973.750 \n",
+ "2 8607.000000 6249141.000 \n",
+ "3 0.000000 6987759.000 \n",
+ "4 5567.428571 7391038.429 \n",
+ "... ... ... \n",
+ "6872 1779.285714 6465018.286 \n",
+ "6873 0.000000 7297615.000 \n",
+ "6874 5816.000000 7115304.000 \n",
+ "6875 5521.000000 6386553.000 \n",
+ "6876 0.000000 7225042.000 \n",
+ "\n",
+ "[6877 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# normalizacja\n",
+ "\n",
+ "\n",
+ "plant_normalized = plant_test.copy()\n",
+ "column = 'DC_POWER'\n",
+ "\n",
+ "plant_normalized[column] = plant_normalized[column] / plant_normalized[column].abs().max()\n",
+ "\n",
+ "plant_normalized"
+ ]
+ }
+ ],
+ "metadata": {
+ "interpreter": {
+ "hash": "ac59ebe37160ed0dfa835113d9b8498d9f09ceb179beaac4002f036b9467c963"
+ },
+ "kernelspec": {
+ "display_name": "Python 3.9.1 64-bit",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.1"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..3a40842
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+kaggle==1.5.12
+pandas==1.4.1
\ No newline at end of file