From d4b1d9892687e5c29b75bb252831d40e8486996d Mon Sep 17 00:00:00 2001 From: Marcin Kostrzewski Date: Sun, 20 Mar 2022 18:07:34 +0100 Subject: [PATCH] Added first solution --- README.md | 23 +- power_plant_data_stats.ipynb | 988 +++++++++++++++++++++++++++++++++++ requirements.txt | 2 + 3 files changed, 1012 insertions(+), 1 deletion(-) create mode 100644 power_plant_data_stats.ipynb create mode 100644 requirements.txt diff --git a/README.md b/README.md index 8992dab..7d4927d 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,24 @@ # ium_444409 -Zadania realizowane w ramach zajęć Inżynieria Uczenia Maszynowego \ No newline at end of file +Zadania realizowane w ramach zajęć Inżynieria Uczenia Maszynowego. + +## Zbiór +***Solar Power Generation Data*** +https://www.kaggle.com/datasets/anikannal/solar-power-generation-data?select=Plant_1_Generation_Data.csv + +## Wymagania +- `python3` +- `pip` +- API token z `kaggle.com` + +## Uruchamianie +- Instalujemy potrzebne pakiety: +```sh +$ pip install -r requirements.txt +``` +- Pobieramy zbiór danych z Kaggle. Skorzystamy ze skryptu w repo, który pobierze i podzieli dane na podzbiory: +``` +$ ./download_dataset.sh +``` + + diff --git a/power_plant_data_stats.ipynb b/power_plant_data_stats.ipynb new file mode 100644 index 0000000..5f7edfb --- /dev/null +++ b/power_plant_data_stats.ipynb @@ -0,0 +1,988 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DATE_TIMEPLANT_IDSOURCE_KEYDC_POWERAC_POWERDAILY_YIELDTOTAL_YIELD
count6877868778.06877868778.00000068778.00000068778.0000006.877800e+04
unique3158NaN22NaNNaNNaNNaN
top01-06-2020 12:45NaNbvBOhCH3iADSZryNaNNaNNaNNaN
freq22NaN3155NaNNaNNaNNaN
meanNaN4135001.0NaN3147.426211307.8027523295.9687376.978712e+06
stdNaN0.0NaN4036.457169394.3964393145.1783094.162720e+05
minNaN4135001.0NaN0.0000000.0000000.0000006.183645e+06
25%NaN4135001.0NaN0.0000000.0000000.0000006.512003e+06
50%NaN4135001.0NaN429.00000041.4937502658.7142867.146685e+06
75%NaN4135001.0NaN6366.964286623.6187506274.0000007.268706e+06
maxNaN4135001.0NaN14471.1250001410.9500009163.0000007.846821e+06
\n", + "
" + ], + "text/plain": [ + " DATE_TIME PLANT_ID SOURCE_KEY DC_POWER \\\n", + "count 68778 68778.0 68778 68778.000000 \n", + "unique 3158 NaN 22 NaN \n", + "top 01-06-2020 12:45 NaN bvBOhCH3iADSZry NaN \n", + "freq 22 NaN 3155 NaN \n", + "mean NaN 4135001.0 NaN 3147.426211 \n", + "std NaN 0.0 NaN 4036.457169 \n", + "min NaN 4135001.0 NaN 0.000000 \n", + "25% NaN 4135001.0 NaN 0.000000 \n", + "50% NaN 4135001.0 NaN 429.000000 \n", + "75% NaN 4135001.0 NaN 6366.964286 \n", + "max NaN 4135001.0 NaN 14471.125000 \n", + "\n", + " AC_POWER DAILY_YIELD TOTAL_YIELD \n", + "count 68778.000000 68778.000000 6.877800e+04 \n", + "unique NaN NaN NaN \n", + "top NaN NaN NaN \n", + "freq NaN NaN NaN \n", + "mean 307.802752 3295.968737 6.978712e+06 \n", + "std 394.396439 3145.178309 4.162720e+05 \n", + "min 0.000000 0.000000 6.183645e+06 \n", + "25% 0.000000 0.000000 6.512003e+06 \n", + "50% 41.493750 2658.714286 7.146685e+06 \n", + "75% 623.618750 6274.000000 7.268706e+06 \n", + "max 1410.950000 9163.000000 7.846821e+06 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# statystyki dla pełnego zbioru\n", + "\n", + "import pandas as pd\n", + "plant_all = pd.read_csv('data/Plant_1_Generation_Data.csv')\n", + "plant_all.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DATE_TIMEPLANT_IDSOURCE_KEYDC_POWERAC_POWERDAILY_YIELDTOTAL_YIELD
count68776877.068776877.0000006877.0000006877.0000006.877000e+03
unique2833NaN22NaNNaNNaNNaN
top01-06-2020 00:00NaN1BY6WEcLGh8j5v7NaNNaNNaNNaN
freq8NaN345NaNNaNNaNNaN
meanNaN4135001.0NaN3260.482360318.8576423310.7692696.974811e+06
stdNaN0.0NaN4068.560282397.5320313139.9061754.218293e+05
minNaN4135001.0NaN0.0000000.0000000.0000006.183645e+06
25%NaN4135001.0NaN0.0000000.0000000.0000006.497496e+06
50%NaN4135001.0NaN680.28571465.9142862652.7142867.143812e+06
75%NaN4135001.0NaN6623.571429648.8428576277.0000007.266135e+06
maxNaN4135001.0NaN14418.4285701405.8000009163.0000007.846821e+06
\n", + "
" + ], + "text/plain": [ + " DATE_TIME PLANT_ID SOURCE_KEY DC_POWER \\\n", + "count 6877 6877.0 6877 6877.000000 \n", + "unique 2833 NaN 22 NaN \n", + "top 01-06-2020 00:00 NaN 1BY6WEcLGh8j5v7 NaN \n", + "freq 8 NaN 345 NaN \n", + "mean NaN 4135001.0 NaN 3260.482360 \n", + "std NaN 0.0 NaN 4068.560282 \n", + "min NaN 4135001.0 NaN 0.000000 \n", + "25% NaN 4135001.0 NaN 0.000000 \n", + "50% NaN 4135001.0 NaN 680.285714 \n", + "75% NaN 4135001.0 NaN 6623.571429 \n", + "max NaN 4135001.0 NaN 14418.428570 \n", + "\n", + " AC_POWER DAILY_YIELD TOTAL_YIELD \n", + "count 6877.000000 6877.000000 6.877000e+03 \n", + "unique NaN NaN NaN \n", + "top NaN NaN NaN \n", + "freq NaN NaN NaN \n", + "mean 318.857642 3310.769269 6.974811e+06 \n", + "std 397.532031 3139.906175 4.218293e+05 \n", + "min 0.000000 0.000000 6.183645e+06 \n", + "25% 0.000000 0.000000 6.497496e+06 \n", + "50% 65.914286 2652.714286 7.143812e+06 \n", + "75% 648.842857 6277.000000 7.266135e+06 \n", + "max 1405.800000 9163.000000 7.846821e+06 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# statystyki dla zbioru dev\n", + "\n", + "plant_dev = pd.read_csv('data/Plant_1_Generation_Data.csv.dev')\n", + "plant_dev.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DATE_TIMEPLANT_IDSOURCE_KEYDC_POWERAC_POWERDAILY_YIELDTOTAL_YIELD
count68776877.068776877.0000006877.0000006877.0000006.877000e+03
unique2831NaN22NaNNaNNaNNaN
top03-06-2020 13:30NaNz9Y9gH1T5YWrNuGNaNNaNNaNNaN
freq9NaN363NaNNaNNaNNaN
meanNaN4135001.0NaN3150.807630308.1514263305.7639076.981431e+06
stdNaN0.0NaN4020.609169392.8785253142.4075104.151093e+05
minNaN4135001.0NaN0.0000000.0000000.0000006.183645e+06
25%NaN4135001.0NaN0.0000000.0000000.0000006.512002e+06
50%NaN4135001.0NaN468.28571445.4000002682.2857147.149051e+06
75%NaN4135001.0NaN6369.250000623.9750006274.0000007.271854e+06
maxNaN4135001.0NaN14466.8571401410.5285719163.0000007.846821e+06
\n", + "
" + ], + "text/plain": [ + " DATE_TIME PLANT_ID SOURCE_KEY DC_POWER \\\n", + "count 6877 6877.0 6877 6877.000000 \n", + "unique 2831 NaN 22 NaN \n", + "top 03-06-2020 13:30 NaN z9Y9gH1T5YWrNuG NaN \n", + "freq 9 NaN 363 NaN \n", + "mean NaN 4135001.0 NaN 3150.807630 \n", + "std NaN 0.0 NaN 4020.609169 \n", + "min NaN 4135001.0 NaN 0.000000 \n", + "25% NaN 4135001.0 NaN 0.000000 \n", + "50% NaN 4135001.0 NaN 468.285714 \n", + "75% NaN 4135001.0 NaN 6369.250000 \n", + "max NaN 4135001.0 NaN 14466.857140 \n", + "\n", + " AC_POWER DAILY_YIELD TOTAL_YIELD \n", + "count 6877.000000 6877.000000 6.877000e+03 \n", + "unique NaN NaN NaN \n", + "top NaN NaN NaN \n", + "freq NaN NaN NaN \n", + "mean 308.151426 3305.763907 6.981431e+06 \n", + "std 392.878525 3142.407510 4.151093e+05 \n", + "min 0.000000 0.000000 6.183645e+06 \n", + "25% 0.000000 0.000000 6.512002e+06 \n", + "50% 45.400000 2682.285714 7.149051e+06 \n", + "75% 623.975000 6274.000000 7.271854e+06 \n", + "max 1410.528571 9163.000000 7.846821e+06 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# statystyki dla zbioru test\n", + "\n", + "plant_test = pd.read_csv('data/Plant_1_Generation_Data.csv.test')\n", + "plant_test.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DATE_TIMEPLANT_IDSOURCE_KEYDC_POWERAC_POWERDAILY_YIELDTOTAL_YIELD
count5502455024.05502455024.00000055024.00000055024.0000005.502400e+04
unique3158NaN22NaNNaNNaNNaN
top15-06-2020 09:30NaNiCRJl6heRkivqQ3NaNNaNNaNNaN
freq22NaN2561NaNNaNNaNNaN
meanNaN4135001.0NaN3132.873631306.3775143292.8947216.978859e+06
stdNaN0.0NaN4034.254455394.1775103146.2319204.157218e+05
minNaN4135001.0NaN0.0000000.0000000.0000006.183645e+06
25%NaN4135001.0NaN0.0000000.0000000.0000006.514911e+06
50%NaN4135001.0NaN400.50000038.7205362658.0625007.146685e+06
75%NaN4135001.0NaN6337.535714620.7281256273.6160727.268792e+06
maxNaN4135001.0NaN14471.1250001410.9500009163.0000007.846821e+06
\n", + "
" + ], + "text/plain": [ + " DATE_TIME PLANT_ID SOURCE_KEY DC_POWER \\\n", + "count 55024 55024.0 55024 55024.000000 \n", + "unique 3158 NaN 22 NaN \n", + "top 15-06-2020 09:30 NaN iCRJl6heRkivqQ3 NaN \n", + "freq 22 NaN 2561 NaN \n", + "mean NaN 4135001.0 NaN 3132.873631 \n", + "std NaN 0.0 NaN 4034.254455 \n", + "min NaN 4135001.0 NaN 0.000000 \n", + "25% NaN 4135001.0 NaN 0.000000 \n", + "50% NaN 4135001.0 NaN 400.500000 \n", + "75% NaN 4135001.0 NaN 6337.535714 \n", + "max NaN 4135001.0 NaN 14471.125000 \n", + "\n", + " AC_POWER DAILY_YIELD TOTAL_YIELD \n", + "count 55024.000000 55024.000000 5.502400e+04 \n", + "unique NaN NaN NaN \n", + "top NaN NaN NaN \n", + "freq NaN NaN NaN \n", + "mean 306.377514 3292.894721 6.978859e+06 \n", + "std 394.177510 3146.231920 4.157218e+05 \n", + "min 0.000000 0.000000 6.183645e+06 \n", + "25% 0.000000 0.000000 6.514911e+06 \n", + "50% 38.720536 2658.062500 7.146685e+06 \n", + "75% 620.728125 6273.616072 7.268792e+06 \n", + "max 1410.950000 9163.000000 7.846821e+06 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# statystyki dla zbioru train\n", + "\n", + "plant_train = pd.read_csv('data/Plant_1_Generation_Data.csv.train')\n", + "plant_train.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DATE_TIMEPLANT_IDSOURCE_KEYDC_POWERAC_POWERDAILY_YIELDTOTAL_YIELD
010-06-2020 22:454135001rGa61gmuvPhdLxV0.0000000.0000006565.0000007310769.000
125-05-2020 07:154135001uHbuxQJl8lW7ozc0.166544236.262500121.7500007111973.750
223-05-2020 17:4541350011IF53ai7Xc0U56Y0.109156154.4857148607.0000006249141.000
315-05-2020 04:4541350013PZuoBAID5Wc2HD0.0000000.0000000.0000006987759.000
412-06-2020 16:304135001iCRJl6heRkivqQ30.191808272.1571435567.4285717391038.429
........................
687201-06-2020 10:004135001zBIq5rxdHJRwDNY0.539282763.6285711779.2857146465018.286
687327-05-2020 02:004135001VHMLBKoKgIrUVDU0.0000000.0000000.0000007297615.000
687431-05-2020 21:3041350013PZuoBAID5Wc2HD0.0000000.0000005816.0000007115304.000
687511-06-2020 18:454135001ih0vzX44oOqAx2f0.0000000.0000005521.0000006386553.000
687616-06-2020 05:4541350013PZuoBAID5Wc2HD0.0000000.0000000.0000007225042.000
\n", + "

6877 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " DATE_TIME PLANT_ID SOURCE_KEY DC_POWER AC_POWER \\\n", + "0 10-06-2020 22:45 4135001 rGa61gmuvPhdLxV 0.000000 0.000000 \n", + "1 25-05-2020 07:15 4135001 uHbuxQJl8lW7ozc 0.166544 236.262500 \n", + "2 23-05-2020 17:45 4135001 1IF53ai7Xc0U56Y 0.109156 154.485714 \n", + "3 15-05-2020 04:45 4135001 3PZuoBAID5Wc2HD 0.000000 0.000000 \n", + "4 12-06-2020 16:30 4135001 iCRJl6heRkivqQ3 0.191808 272.157143 \n", + "... ... ... ... ... ... \n", + "6872 01-06-2020 10:00 4135001 zBIq5rxdHJRwDNY 0.539282 763.628571 \n", + "6873 27-05-2020 02:00 4135001 VHMLBKoKgIrUVDU 0.000000 0.000000 \n", + "6874 31-05-2020 21:30 4135001 3PZuoBAID5Wc2HD 0.000000 0.000000 \n", + "6875 11-06-2020 18:45 4135001 ih0vzX44oOqAx2f 0.000000 0.000000 \n", + "6876 16-06-2020 05:45 4135001 3PZuoBAID5Wc2HD 0.000000 0.000000 \n", + "\n", + " DAILY_YIELD TOTAL_YIELD \n", + "0 6565.000000 7310769.000 \n", + "1 121.750000 7111973.750 \n", + "2 8607.000000 6249141.000 \n", + "3 0.000000 6987759.000 \n", + "4 5567.428571 7391038.429 \n", + "... ... ... \n", + "6872 1779.285714 6465018.286 \n", + "6873 0.000000 7297615.000 \n", + "6874 5816.000000 7115304.000 \n", + "6875 5521.000000 6386553.000 \n", + "6876 0.000000 7225042.000 \n", + "\n", + "[6877 rows x 7 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# normalizacja\n", + "\n", + "\n", + "plant_normalized = plant_test.copy()\n", + "column = 'DC_POWER'\n", + "\n", + "plant_normalized[column] = plant_normalized[column] / plant_normalized[column].abs().max()\n", + "\n", + "plant_normalized" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "ac59ebe37160ed0dfa835113d9b8498d9f09ceb179beaac4002f036b9467c963" + }, + "kernelspec": { + "display_name": "Python 3.9.1 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3a40842 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +kaggle==1.5.12 +pandas==1.4.1 \ No newline at end of file