989 lines
33 KiB
Plaintext
989 lines
33 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>DATE_TIME</th>\n",
|
||
" <th>PLANT_ID</th>\n",
|
||
" <th>SOURCE_KEY</th>\n",
|
||
" <th>DC_POWER</th>\n",
|
||
" <th>AC_POWER</th>\n",
|
||
" <th>DAILY_YIELD</th>\n",
|
||
" <th>TOTAL_YIELD</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>68778</td>\n",
|
||
" <td>68778.0</td>\n",
|
||
" <td>68778</td>\n",
|
||
" <td>68778.000000</td>\n",
|
||
" <td>68778.000000</td>\n",
|
||
" <td>68778.000000</td>\n",
|
||
" <td>6.877800e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>3158</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>22</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>01-06-2020 12:45</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>bvBOhCH3iADSZry</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>22</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3155</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3147.426211</td>\n",
|
||
" <td>307.802752</td>\n",
|
||
" <td>3295.968737</td>\n",
|
||
" <td>6.978712e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4036.457169</td>\n",
|
||
" <td>394.396439</td>\n",
|
||
" <td>3145.178309</td>\n",
|
||
" <td>4.162720e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>6.183645e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>6.512003e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>429.000000</td>\n",
|
||
" <td>41.493750</td>\n",
|
||
" <td>2658.714286</td>\n",
|
||
" <td>7.146685e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>6366.964286</td>\n",
|
||
" <td>623.618750</td>\n",
|
||
" <td>6274.000000</td>\n",
|
||
" <td>7.268706e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>14471.125000</td>\n",
|
||
" <td>1410.950000</td>\n",
|
||
" <td>9163.000000</td>\n",
|
||
" <td>7.846821e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" DATE_TIME PLANT_ID SOURCE_KEY DC_POWER \\\n",
|
||
"count 68778 68778.0 68778 68778.000000 \n",
|
||
"unique 3158 NaN 22 NaN \n",
|
||
"top 01-06-2020 12:45 NaN bvBOhCH3iADSZry NaN \n",
|
||
"freq 22 NaN 3155 NaN \n",
|
||
"mean NaN 4135001.0 NaN 3147.426211 \n",
|
||
"std NaN 0.0 NaN 4036.457169 \n",
|
||
"min NaN 4135001.0 NaN 0.000000 \n",
|
||
"25% NaN 4135001.0 NaN 0.000000 \n",
|
||
"50% NaN 4135001.0 NaN 429.000000 \n",
|
||
"75% NaN 4135001.0 NaN 6366.964286 \n",
|
||
"max NaN 4135001.0 NaN 14471.125000 \n",
|
||
"\n",
|
||
" AC_POWER DAILY_YIELD TOTAL_YIELD \n",
|
||
"count 68778.000000 68778.000000 6.877800e+04 \n",
|
||
"unique NaN NaN NaN \n",
|
||
"top NaN NaN NaN \n",
|
||
"freq NaN NaN NaN \n",
|
||
"mean 307.802752 3295.968737 6.978712e+06 \n",
|
||
"std 394.396439 3145.178309 4.162720e+05 \n",
|
||
"min 0.000000 0.000000 6.183645e+06 \n",
|
||
"25% 0.000000 0.000000 6.512003e+06 \n",
|
||
"50% 41.493750 2658.714286 7.146685e+06 \n",
|
||
"75% 623.618750 6274.000000 7.268706e+06 \n",
|
||
"max 1410.950000 9163.000000 7.846821e+06 "
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# statystyki dla pełnego zbioru\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"plant_all = pd.read_csv('data/Plant_1_Generation_Data.csv')\n",
|
||
"plant_all.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>DATE_TIME</th>\n",
|
||
" <th>PLANT_ID</th>\n",
|
||
" <th>SOURCE_KEY</th>\n",
|
||
" <th>DC_POWER</th>\n",
|
||
" <th>AC_POWER</th>\n",
|
||
" <th>DAILY_YIELD</th>\n",
|
||
" <th>TOTAL_YIELD</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>6877</td>\n",
|
||
" <td>6877.0</td>\n",
|
||
" <td>6877</td>\n",
|
||
" <td>6877.000000</td>\n",
|
||
" <td>6877.000000</td>\n",
|
||
" <td>6877.000000</td>\n",
|
||
" <td>6.877000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>2833</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>22</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>01-06-2020 00:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1BY6WEcLGh8j5v7</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>8</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>345</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3260.482360</td>\n",
|
||
" <td>318.857642</td>\n",
|
||
" <td>3310.769269</td>\n",
|
||
" <td>6.974811e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4068.560282</td>\n",
|
||
" <td>397.532031</td>\n",
|
||
" <td>3139.906175</td>\n",
|
||
" <td>4.218293e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>6.183645e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>6.497496e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>680.285714</td>\n",
|
||
" <td>65.914286</td>\n",
|
||
" <td>2652.714286</td>\n",
|
||
" <td>7.143812e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>6623.571429</td>\n",
|
||
" <td>648.842857</td>\n",
|
||
" <td>6277.000000</td>\n",
|
||
" <td>7.266135e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>14418.428570</td>\n",
|
||
" <td>1405.800000</td>\n",
|
||
" <td>9163.000000</td>\n",
|
||
" <td>7.846821e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" DATE_TIME PLANT_ID SOURCE_KEY DC_POWER \\\n",
|
||
"count 6877 6877.0 6877 6877.000000 \n",
|
||
"unique 2833 NaN 22 NaN \n",
|
||
"top 01-06-2020 00:00 NaN 1BY6WEcLGh8j5v7 NaN \n",
|
||
"freq 8 NaN 345 NaN \n",
|
||
"mean NaN 4135001.0 NaN 3260.482360 \n",
|
||
"std NaN 0.0 NaN 4068.560282 \n",
|
||
"min NaN 4135001.0 NaN 0.000000 \n",
|
||
"25% NaN 4135001.0 NaN 0.000000 \n",
|
||
"50% NaN 4135001.0 NaN 680.285714 \n",
|
||
"75% NaN 4135001.0 NaN 6623.571429 \n",
|
||
"max NaN 4135001.0 NaN 14418.428570 \n",
|
||
"\n",
|
||
" AC_POWER DAILY_YIELD TOTAL_YIELD \n",
|
||
"count 6877.000000 6877.000000 6.877000e+03 \n",
|
||
"unique NaN NaN NaN \n",
|
||
"top NaN NaN NaN \n",
|
||
"freq NaN NaN NaN \n",
|
||
"mean 318.857642 3310.769269 6.974811e+06 \n",
|
||
"std 397.532031 3139.906175 4.218293e+05 \n",
|
||
"min 0.000000 0.000000 6.183645e+06 \n",
|
||
"25% 0.000000 0.000000 6.497496e+06 \n",
|
||
"50% 65.914286 2652.714286 7.143812e+06 \n",
|
||
"75% 648.842857 6277.000000 7.266135e+06 \n",
|
||
"max 1405.800000 9163.000000 7.846821e+06 "
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# statystyki dla zbioru dev\n",
|
||
"\n",
|
||
"plant_dev = pd.read_csv('data/Plant_1_Generation_Data.csv.dev')\n",
|
||
"plant_dev.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>DATE_TIME</th>\n",
|
||
" <th>PLANT_ID</th>\n",
|
||
" <th>SOURCE_KEY</th>\n",
|
||
" <th>DC_POWER</th>\n",
|
||
" <th>AC_POWER</th>\n",
|
||
" <th>DAILY_YIELD</th>\n",
|
||
" <th>TOTAL_YIELD</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>6877</td>\n",
|
||
" <td>6877.0</td>\n",
|
||
" <td>6877</td>\n",
|
||
" <td>6877.000000</td>\n",
|
||
" <td>6877.000000</td>\n",
|
||
" <td>6877.000000</td>\n",
|
||
" <td>6.877000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>2831</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>22</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>03-06-2020 13:30</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>z9Y9gH1T5YWrNuG</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>363</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3150.807630</td>\n",
|
||
" <td>308.151426</td>\n",
|
||
" <td>3305.763907</td>\n",
|
||
" <td>6.981431e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4020.609169</td>\n",
|
||
" <td>392.878525</td>\n",
|
||
" <td>3142.407510</td>\n",
|
||
" <td>4.151093e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>6.183645e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>6.512002e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>468.285714</td>\n",
|
||
" <td>45.400000</td>\n",
|
||
" <td>2682.285714</td>\n",
|
||
" <td>7.149051e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>6369.250000</td>\n",
|
||
" <td>623.975000</td>\n",
|
||
" <td>6274.000000</td>\n",
|
||
" <td>7.271854e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>14466.857140</td>\n",
|
||
" <td>1410.528571</td>\n",
|
||
" <td>9163.000000</td>\n",
|
||
" <td>7.846821e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" DATE_TIME PLANT_ID SOURCE_KEY DC_POWER \\\n",
|
||
"count 6877 6877.0 6877 6877.000000 \n",
|
||
"unique 2831 NaN 22 NaN \n",
|
||
"top 03-06-2020 13:30 NaN z9Y9gH1T5YWrNuG NaN \n",
|
||
"freq 9 NaN 363 NaN \n",
|
||
"mean NaN 4135001.0 NaN 3150.807630 \n",
|
||
"std NaN 0.0 NaN 4020.609169 \n",
|
||
"min NaN 4135001.0 NaN 0.000000 \n",
|
||
"25% NaN 4135001.0 NaN 0.000000 \n",
|
||
"50% NaN 4135001.0 NaN 468.285714 \n",
|
||
"75% NaN 4135001.0 NaN 6369.250000 \n",
|
||
"max NaN 4135001.0 NaN 14466.857140 \n",
|
||
"\n",
|
||
" AC_POWER DAILY_YIELD TOTAL_YIELD \n",
|
||
"count 6877.000000 6877.000000 6.877000e+03 \n",
|
||
"unique NaN NaN NaN \n",
|
||
"top NaN NaN NaN \n",
|
||
"freq NaN NaN NaN \n",
|
||
"mean 308.151426 3305.763907 6.981431e+06 \n",
|
||
"std 392.878525 3142.407510 4.151093e+05 \n",
|
||
"min 0.000000 0.000000 6.183645e+06 \n",
|
||
"25% 0.000000 0.000000 6.512002e+06 \n",
|
||
"50% 45.400000 2682.285714 7.149051e+06 \n",
|
||
"75% 623.975000 6274.000000 7.271854e+06 \n",
|
||
"max 1410.528571 9163.000000 7.846821e+06 "
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# statystyki dla zbioru test\n",
|
||
"\n",
|
||
"plant_test = pd.read_csv('data/Plant_1_Generation_Data.csv.test')\n",
|
||
"plant_test.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>DATE_TIME</th>\n",
|
||
" <th>PLANT_ID</th>\n",
|
||
" <th>SOURCE_KEY</th>\n",
|
||
" <th>DC_POWER</th>\n",
|
||
" <th>AC_POWER</th>\n",
|
||
" <th>DAILY_YIELD</th>\n",
|
||
" <th>TOTAL_YIELD</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>55024</td>\n",
|
||
" <td>55024.0</td>\n",
|
||
" <td>55024</td>\n",
|
||
" <td>55024.000000</td>\n",
|
||
" <td>55024.000000</td>\n",
|
||
" <td>55024.000000</td>\n",
|
||
" <td>5.502400e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>3158</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>22</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>15-06-2020 09:30</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>iCRJl6heRkivqQ3</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>22</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2561</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3132.873631</td>\n",
|
||
" <td>306.377514</td>\n",
|
||
" <td>3292.894721</td>\n",
|
||
" <td>6.978859e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4034.254455</td>\n",
|
||
" <td>394.177510</td>\n",
|
||
" <td>3146.231920</td>\n",
|
||
" <td>4.157218e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>6.183645e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>6.514911e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>400.500000</td>\n",
|
||
" <td>38.720536</td>\n",
|
||
" <td>2658.062500</td>\n",
|
||
" <td>7.146685e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>6337.535714</td>\n",
|
||
" <td>620.728125</td>\n",
|
||
" <td>6273.616072</td>\n",
|
||
" <td>7.268792e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4135001.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>14471.125000</td>\n",
|
||
" <td>1410.950000</td>\n",
|
||
" <td>9163.000000</td>\n",
|
||
" <td>7.846821e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" DATE_TIME PLANT_ID SOURCE_KEY DC_POWER \\\n",
|
||
"count 55024 55024.0 55024 55024.000000 \n",
|
||
"unique 3158 NaN 22 NaN \n",
|
||
"top 15-06-2020 09:30 NaN iCRJl6heRkivqQ3 NaN \n",
|
||
"freq 22 NaN 2561 NaN \n",
|
||
"mean NaN 4135001.0 NaN 3132.873631 \n",
|
||
"std NaN 0.0 NaN 4034.254455 \n",
|
||
"min NaN 4135001.0 NaN 0.000000 \n",
|
||
"25% NaN 4135001.0 NaN 0.000000 \n",
|
||
"50% NaN 4135001.0 NaN 400.500000 \n",
|
||
"75% NaN 4135001.0 NaN 6337.535714 \n",
|
||
"max NaN 4135001.0 NaN 14471.125000 \n",
|
||
"\n",
|
||
" AC_POWER DAILY_YIELD TOTAL_YIELD \n",
|
||
"count 55024.000000 55024.000000 5.502400e+04 \n",
|
||
"unique NaN NaN NaN \n",
|
||
"top NaN NaN NaN \n",
|
||
"freq NaN NaN NaN \n",
|
||
"mean 306.377514 3292.894721 6.978859e+06 \n",
|
||
"std 394.177510 3146.231920 4.157218e+05 \n",
|
||
"min 0.000000 0.000000 6.183645e+06 \n",
|
||
"25% 0.000000 0.000000 6.514911e+06 \n",
|
||
"50% 38.720536 2658.062500 7.146685e+06 \n",
|
||
"75% 620.728125 6273.616072 7.268792e+06 \n",
|
||
"max 1410.950000 9163.000000 7.846821e+06 "
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# statystyki dla zbioru train\n",
|
||
"\n",
|
||
"plant_train = pd.read_csv('data/Plant_1_Generation_Data.csv.train')\n",
|
||
"plant_train.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>DATE_TIME</th>\n",
|
||
" <th>PLANT_ID</th>\n",
|
||
" <th>SOURCE_KEY</th>\n",
|
||
" <th>DC_POWER</th>\n",
|
||
" <th>AC_POWER</th>\n",
|
||
" <th>DAILY_YIELD</th>\n",
|
||
" <th>TOTAL_YIELD</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10-06-2020 22:45</td>\n",
|
||
" <td>4135001</td>\n",
|
||
" <td>rGa61gmuvPhdLxV</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>6565.000000</td>\n",
|
||
" <td>7310769.000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>25-05-2020 07:15</td>\n",
|
||
" <td>4135001</td>\n",
|
||
" <td>uHbuxQJl8lW7ozc</td>\n",
|
||
" <td>0.166544</td>\n",
|
||
" <td>236.262500</td>\n",
|
||
" <td>121.750000</td>\n",
|
||
" <td>7111973.750</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>23-05-2020 17:45</td>\n",
|
||
" <td>4135001</td>\n",
|
||
" <td>1IF53ai7Xc0U56Y</td>\n",
|
||
" <td>0.109156</td>\n",
|
||
" <td>154.485714</td>\n",
|
||
" <td>8607.000000</td>\n",
|
||
" <td>6249141.000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>15-05-2020 04:45</td>\n",
|
||
" <td>4135001</td>\n",
|
||
" <td>3PZuoBAID5Wc2HD</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>6987759.000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>12-06-2020 16:30</td>\n",
|
||
" <td>4135001</td>\n",
|
||
" <td>iCRJl6heRkivqQ3</td>\n",
|
||
" <td>0.191808</td>\n",
|
||
" <td>272.157143</td>\n",
|
||
" <td>5567.428571</td>\n",
|
||
" <td>7391038.429</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6872</th>\n",
|
||
" <td>01-06-2020 10:00</td>\n",
|
||
" <td>4135001</td>\n",
|
||
" <td>zBIq5rxdHJRwDNY</td>\n",
|
||
" <td>0.539282</td>\n",
|
||
" <td>763.628571</td>\n",
|
||
" <td>1779.285714</td>\n",
|
||
" <td>6465018.286</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6873</th>\n",
|
||
" <td>27-05-2020 02:00</td>\n",
|
||
" <td>4135001</td>\n",
|
||
" <td>VHMLBKoKgIrUVDU</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7297615.000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6874</th>\n",
|
||
" <td>31-05-2020 21:30</td>\n",
|
||
" <td>4135001</td>\n",
|
||
" <td>3PZuoBAID5Wc2HD</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5816.000000</td>\n",
|
||
" <td>7115304.000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6875</th>\n",
|
||
" <td>11-06-2020 18:45</td>\n",
|
||
" <td>4135001</td>\n",
|
||
" <td>ih0vzX44oOqAx2f</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5521.000000</td>\n",
|
||
" <td>6386553.000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6876</th>\n",
|
||
" <td>16-06-2020 05:45</td>\n",
|
||
" <td>4135001</td>\n",
|
||
" <td>3PZuoBAID5Wc2HD</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7225042.000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6877 rows × 7 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" DATE_TIME PLANT_ID SOURCE_KEY DC_POWER AC_POWER \\\n",
|
||
"0 10-06-2020 22:45 4135001 rGa61gmuvPhdLxV 0.000000 0.000000 \n",
|
||
"1 25-05-2020 07:15 4135001 uHbuxQJl8lW7ozc 0.166544 236.262500 \n",
|
||
"2 23-05-2020 17:45 4135001 1IF53ai7Xc0U56Y 0.109156 154.485714 \n",
|
||
"3 15-05-2020 04:45 4135001 3PZuoBAID5Wc2HD 0.000000 0.000000 \n",
|
||
"4 12-06-2020 16:30 4135001 iCRJl6heRkivqQ3 0.191808 272.157143 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"6872 01-06-2020 10:00 4135001 zBIq5rxdHJRwDNY 0.539282 763.628571 \n",
|
||
"6873 27-05-2020 02:00 4135001 VHMLBKoKgIrUVDU 0.000000 0.000000 \n",
|
||
"6874 31-05-2020 21:30 4135001 3PZuoBAID5Wc2HD 0.000000 0.000000 \n",
|
||
"6875 11-06-2020 18:45 4135001 ih0vzX44oOqAx2f 0.000000 0.000000 \n",
|
||
"6876 16-06-2020 05:45 4135001 3PZuoBAID5Wc2HD 0.000000 0.000000 \n",
|
||
"\n",
|
||
" DAILY_YIELD TOTAL_YIELD \n",
|
||
"0 6565.000000 7310769.000 \n",
|
||
"1 121.750000 7111973.750 \n",
|
||
"2 8607.000000 6249141.000 \n",
|
||
"3 0.000000 6987759.000 \n",
|
||
"4 5567.428571 7391038.429 \n",
|
||
"... ... ... \n",
|
||
"6872 1779.285714 6465018.286 \n",
|
||
"6873 0.000000 7297615.000 \n",
|
||
"6874 5816.000000 7115304.000 \n",
|
||
"6875 5521.000000 6386553.000 \n",
|
||
"6876 0.000000 7225042.000 \n",
|
||
"\n",
|
||
"[6877 rows x 7 columns]"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# normalizacja\n",
|
||
"\n",
|
||
"\n",
|
||
"plant_normalized = plant_test.copy()\n",
|
||
"column = 'DC_POWER'\n",
|
||
"\n",
|
||
"plant_normalized[column] = plant_normalized[column] / plant_normalized[column].abs().max()\n",
|
||
"\n",
|
||
"plant_normalized"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"interpreter": {
|
||
"hash": "ac59ebe37160ed0dfa835113d9b8498d9f09ceb179beaac4002f036b9467c963"
|
||
},
|
||
"kernelspec": {
|
||
"display_name": "Python 3.9.1 64-bit",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.9.1"
|
||
},
|
||
"orig_nbformat": 4
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|