{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DATE_TIME</th>\n",
       "      <th>PLANT_ID</th>\n",
       "      <th>SOURCE_KEY</th>\n",
       "      <th>DC_POWER</th>\n",
       "      <th>AC_POWER</th>\n",
       "      <th>DAILY_YIELD</th>\n",
       "      <th>TOTAL_YIELD</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>68778</td>\n",
       "      <td>68778.0</td>\n",
       "      <td>68778</td>\n",
       "      <td>68778.000000</td>\n",
       "      <td>68778.000000</td>\n",
       "      <td>68778.000000</td>\n",
       "      <td>6.877800e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>3158</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>01-06-2020 12:45</td>\n",
       "      <td>NaN</td>\n",
       "      <td>bvBOhCH3iADSZry</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3155</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3147.426211</td>\n",
       "      <td>307.802752</td>\n",
       "      <td>3295.968737</td>\n",
       "      <td>6.978712e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4036.457169</td>\n",
       "      <td>394.396439</td>\n",
       "      <td>3145.178309</td>\n",
       "      <td>4.162720e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6.183645e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6.512003e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>429.000000</td>\n",
       "      <td>41.493750</td>\n",
       "      <td>2658.714286</td>\n",
       "      <td>7.146685e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6366.964286</td>\n",
       "      <td>623.618750</td>\n",
       "      <td>6274.000000</td>\n",
       "      <td>7.268706e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14471.125000</td>\n",
       "      <td>1410.950000</td>\n",
       "      <td>9163.000000</td>\n",
       "      <td>7.846821e+06</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               DATE_TIME   PLANT_ID       SOURCE_KEY      DC_POWER  \\\n",
       "count              68778    68778.0            68778  68778.000000   \n",
       "unique              3158        NaN               22           NaN   \n",
       "top     01-06-2020 12:45        NaN  bvBOhCH3iADSZry           NaN   \n",
       "freq                  22        NaN             3155           NaN   \n",
       "mean                 NaN  4135001.0              NaN   3147.426211   \n",
       "std                  NaN        0.0              NaN   4036.457169   \n",
       "min                  NaN  4135001.0              NaN      0.000000   \n",
       "25%                  NaN  4135001.0              NaN      0.000000   \n",
       "50%                  NaN  4135001.0              NaN    429.000000   \n",
       "75%                  NaN  4135001.0              NaN   6366.964286   \n",
       "max                  NaN  4135001.0              NaN  14471.125000   \n",
       "\n",
       "            AC_POWER   DAILY_YIELD   TOTAL_YIELD  \n",
       "count   68778.000000  68778.000000  6.877800e+04  \n",
       "unique           NaN           NaN           NaN  \n",
       "top              NaN           NaN           NaN  \n",
       "freq             NaN           NaN           NaN  \n",
       "mean      307.802752   3295.968737  6.978712e+06  \n",
       "std       394.396439   3145.178309  4.162720e+05  \n",
       "min         0.000000      0.000000  6.183645e+06  \n",
       "25%         0.000000      0.000000  6.512003e+06  \n",
       "50%        41.493750   2658.714286  7.146685e+06  \n",
       "75%       623.618750   6274.000000  7.268706e+06  \n",
       "max      1410.950000   9163.000000  7.846821e+06  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# statystyki dla pełnego zbioru\n",
    "\n",
    "import pandas as pd\n",
    "plant_all = pd.read_csv('data/Plant_1_Generation_Data.csv')\n",
    "plant_all.describe(include='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DATE_TIME</th>\n",
       "      <th>PLANT_ID</th>\n",
       "      <th>SOURCE_KEY</th>\n",
       "      <th>DC_POWER</th>\n",
       "      <th>AC_POWER</th>\n",
       "      <th>DAILY_YIELD</th>\n",
       "      <th>TOTAL_YIELD</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>6877</td>\n",
       "      <td>6877.0</td>\n",
       "      <td>6877</td>\n",
       "      <td>6877.000000</td>\n",
       "      <td>6877.000000</td>\n",
       "      <td>6877.000000</td>\n",
       "      <td>6.877000e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>2833</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>01-06-2020 00:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1BY6WEcLGh8j5v7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>345</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3260.482360</td>\n",
       "      <td>318.857642</td>\n",
       "      <td>3310.769269</td>\n",
       "      <td>6.974811e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4068.560282</td>\n",
       "      <td>397.532031</td>\n",
       "      <td>3139.906175</td>\n",
       "      <td>4.218293e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6.183645e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6.497496e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>680.285714</td>\n",
       "      <td>65.914286</td>\n",
       "      <td>2652.714286</td>\n",
       "      <td>7.143812e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6623.571429</td>\n",
       "      <td>648.842857</td>\n",
       "      <td>6277.000000</td>\n",
       "      <td>7.266135e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14418.428570</td>\n",
       "      <td>1405.800000</td>\n",
       "      <td>9163.000000</td>\n",
       "      <td>7.846821e+06</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               DATE_TIME   PLANT_ID       SOURCE_KEY      DC_POWER  \\\n",
       "count               6877     6877.0             6877   6877.000000   \n",
       "unique              2833        NaN               22           NaN   \n",
       "top     01-06-2020 00:00        NaN  1BY6WEcLGh8j5v7           NaN   \n",
       "freq                   8        NaN              345           NaN   \n",
       "mean                 NaN  4135001.0              NaN   3260.482360   \n",
       "std                  NaN        0.0              NaN   4068.560282   \n",
       "min                  NaN  4135001.0              NaN      0.000000   \n",
       "25%                  NaN  4135001.0              NaN      0.000000   \n",
       "50%                  NaN  4135001.0              NaN    680.285714   \n",
       "75%                  NaN  4135001.0              NaN   6623.571429   \n",
       "max                  NaN  4135001.0              NaN  14418.428570   \n",
       "\n",
       "           AC_POWER  DAILY_YIELD   TOTAL_YIELD  \n",
       "count   6877.000000  6877.000000  6.877000e+03  \n",
       "unique          NaN          NaN           NaN  \n",
       "top             NaN          NaN           NaN  \n",
       "freq            NaN          NaN           NaN  \n",
       "mean     318.857642  3310.769269  6.974811e+06  \n",
       "std      397.532031  3139.906175  4.218293e+05  \n",
       "min        0.000000     0.000000  6.183645e+06  \n",
       "25%        0.000000     0.000000  6.497496e+06  \n",
       "50%       65.914286  2652.714286  7.143812e+06  \n",
       "75%      648.842857  6277.000000  7.266135e+06  \n",
       "max     1405.800000  9163.000000  7.846821e+06  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# statystyki dla zbioru dev\n",
    "\n",
    "plant_dev = pd.read_csv('data/Plant_1_Generation_Data.csv.dev')\n",
    "plant_dev.describe(include='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DATE_TIME</th>\n",
       "      <th>PLANT_ID</th>\n",
       "      <th>SOURCE_KEY</th>\n",
       "      <th>DC_POWER</th>\n",
       "      <th>AC_POWER</th>\n",
       "      <th>DAILY_YIELD</th>\n",
       "      <th>TOTAL_YIELD</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>6877</td>\n",
       "      <td>6877.0</td>\n",
       "      <td>6877</td>\n",
       "      <td>6877.000000</td>\n",
       "      <td>6877.000000</td>\n",
       "      <td>6877.000000</td>\n",
       "      <td>6.877000e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>2831</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>03-06-2020 13:30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>z9Y9gH1T5YWrNuG</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>363</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3150.807630</td>\n",
       "      <td>308.151426</td>\n",
       "      <td>3305.763907</td>\n",
       "      <td>6.981431e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4020.609169</td>\n",
       "      <td>392.878525</td>\n",
       "      <td>3142.407510</td>\n",
       "      <td>4.151093e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6.183645e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6.512002e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>468.285714</td>\n",
       "      <td>45.400000</td>\n",
       "      <td>2682.285714</td>\n",
       "      <td>7.149051e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6369.250000</td>\n",
       "      <td>623.975000</td>\n",
       "      <td>6274.000000</td>\n",
       "      <td>7.271854e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14466.857140</td>\n",
       "      <td>1410.528571</td>\n",
       "      <td>9163.000000</td>\n",
       "      <td>7.846821e+06</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               DATE_TIME   PLANT_ID       SOURCE_KEY      DC_POWER  \\\n",
       "count               6877     6877.0             6877   6877.000000   \n",
       "unique              2831        NaN               22           NaN   \n",
       "top     03-06-2020 13:30        NaN  z9Y9gH1T5YWrNuG           NaN   \n",
       "freq                   9        NaN              363           NaN   \n",
       "mean                 NaN  4135001.0              NaN   3150.807630   \n",
       "std                  NaN        0.0              NaN   4020.609169   \n",
       "min                  NaN  4135001.0              NaN      0.000000   \n",
       "25%                  NaN  4135001.0              NaN      0.000000   \n",
       "50%                  NaN  4135001.0              NaN    468.285714   \n",
       "75%                  NaN  4135001.0              NaN   6369.250000   \n",
       "max                  NaN  4135001.0              NaN  14466.857140   \n",
       "\n",
       "           AC_POWER  DAILY_YIELD   TOTAL_YIELD  \n",
       "count   6877.000000  6877.000000  6.877000e+03  \n",
       "unique          NaN          NaN           NaN  \n",
       "top             NaN          NaN           NaN  \n",
       "freq            NaN          NaN           NaN  \n",
       "mean     308.151426  3305.763907  6.981431e+06  \n",
       "std      392.878525  3142.407510  4.151093e+05  \n",
       "min        0.000000     0.000000  6.183645e+06  \n",
       "25%        0.000000     0.000000  6.512002e+06  \n",
       "50%       45.400000  2682.285714  7.149051e+06  \n",
       "75%      623.975000  6274.000000  7.271854e+06  \n",
       "max     1410.528571  9163.000000  7.846821e+06  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# statystyki dla zbioru test\n",
    "\n",
    "plant_test = pd.read_csv('data/Plant_1_Generation_Data.csv.test')\n",
    "plant_test.describe(include='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DATE_TIME</th>\n",
       "      <th>PLANT_ID</th>\n",
       "      <th>SOURCE_KEY</th>\n",
       "      <th>DC_POWER</th>\n",
       "      <th>AC_POWER</th>\n",
       "      <th>DAILY_YIELD</th>\n",
       "      <th>TOTAL_YIELD</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>55024</td>\n",
       "      <td>55024.0</td>\n",
       "      <td>55024</td>\n",
       "      <td>55024.000000</td>\n",
       "      <td>55024.000000</td>\n",
       "      <td>55024.000000</td>\n",
       "      <td>5.502400e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>3158</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>15-06-2020 09:30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>iCRJl6heRkivqQ3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2561</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3132.873631</td>\n",
       "      <td>306.377514</td>\n",
       "      <td>3292.894721</td>\n",
       "      <td>6.978859e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4034.254455</td>\n",
       "      <td>394.177510</td>\n",
       "      <td>3146.231920</td>\n",
       "      <td>4.157218e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6.183645e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6.514911e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>400.500000</td>\n",
       "      <td>38.720536</td>\n",
       "      <td>2658.062500</td>\n",
       "      <td>7.146685e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6337.535714</td>\n",
       "      <td>620.728125</td>\n",
       "      <td>6273.616072</td>\n",
       "      <td>7.268792e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4135001.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14471.125000</td>\n",
       "      <td>1410.950000</td>\n",
       "      <td>9163.000000</td>\n",
       "      <td>7.846821e+06</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               DATE_TIME   PLANT_ID       SOURCE_KEY      DC_POWER  \\\n",
       "count              55024    55024.0            55024  55024.000000   \n",
       "unique              3158        NaN               22           NaN   \n",
       "top     15-06-2020 09:30        NaN  iCRJl6heRkivqQ3           NaN   \n",
       "freq                  22        NaN             2561           NaN   \n",
       "mean                 NaN  4135001.0              NaN   3132.873631   \n",
       "std                  NaN        0.0              NaN   4034.254455   \n",
       "min                  NaN  4135001.0              NaN      0.000000   \n",
       "25%                  NaN  4135001.0              NaN      0.000000   \n",
       "50%                  NaN  4135001.0              NaN    400.500000   \n",
       "75%                  NaN  4135001.0              NaN   6337.535714   \n",
       "max                  NaN  4135001.0              NaN  14471.125000   \n",
       "\n",
       "            AC_POWER   DAILY_YIELD   TOTAL_YIELD  \n",
       "count   55024.000000  55024.000000  5.502400e+04  \n",
       "unique           NaN           NaN           NaN  \n",
       "top              NaN           NaN           NaN  \n",
       "freq             NaN           NaN           NaN  \n",
       "mean      306.377514   3292.894721  6.978859e+06  \n",
       "std       394.177510   3146.231920  4.157218e+05  \n",
       "min         0.000000      0.000000  6.183645e+06  \n",
       "25%         0.000000      0.000000  6.514911e+06  \n",
       "50%        38.720536   2658.062500  7.146685e+06  \n",
       "75%       620.728125   6273.616072  7.268792e+06  \n",
       "max      1410.950000   9163.000000  7.846821e+06  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# statystyki dla zbioru train\n",
    "\n",
    "plant_train = pd.read_csv('data/Plant_1_Generation_Data.csv.train')\n",
    "plant_train.describe(include='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>DATE_TIME</th>\n",
       "      <th>PLANT_ID</th>\n",
       "      <th>SOURCE_KEY</th>\n",
       "      <th>DC_POWER</th>\n",
       "      <th>AC_POWER</th>\n",
       "      <th>DAILY_YIELD</th>\n",
       "      <th>TOTAL_YIELD</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10-06-2020 22:45</td>\n",
       "      <td>4135001</td>\n",
       "      <td>rGa61gmuvPhdLxV</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6565.000000</td>\n",
       "      <td>7310769.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>25-05-2020 07:15</td>\n",
       "      <td>4135001</td>\n",
       "      <td>uHbuxQJl8lW7ozc</td>\n",
       "      <td>0.166544</td>\n",
       "      <td>236.262500</td>\n",
       "      <td>121.750000</td>\n",
       "      <td>7111973.750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23-05-2020 17:45</td>\n",
       "      <td>4135001</td>\n",
       "      <td>1IF53ai7Xc0U56Y</td>\n",
       "      <td>0.109156</td>\n",
       "      <td>154.485714</td>\n",
       "      <td>8607.000000</td>\n",
       "      <td>6249141.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15-05-2020 04:45</td>\n",
       "      <td>4135001</td>\n",
       "      <td>3PZuoBAID5Wc2HD</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6987759.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>12-06-2020 16:30</td>\n",
       "      <td>4135001</td>\n",
       "      <td>iCRJl6heRkivqQ3</td>\n",
       "      <td>0.191808</td>\n",
       "      <td>272.157143</td>\n",
       "      <td>5567.428571</td>\n",
       "      <td>7391038.429</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6872</th>\n",
       "      <td>01-06-2020 10:00</td>\n",
       "      <td>4135001</td>\n",
       "      <td>zBIq5rxdHJRwDNY</td>\n",
       "      <td>0.539282</td>\n",
       "      <td>763.628571</td>\n",
       "      <td>1779.285714</td>\n",
       "      <td>6465018.286</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6873</th>\n",
       "      <td>27-05-2020 02:00</td>\n",
       "      <td>4135001</td>\n",
       "      <td>VHMLBKoKgIrUVDU</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>7297615.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6874</th>\n",
       "      <td>31-05-2020 21:30</td>\n",
       "      <td>4135001</td>\n",
       "      <td>3PZuoBAID5Wc2HD</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5816.000000</td>\n",
       "      <td>7115304.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6875</th>\n",
       "      <td>11-06-2020 18:45</td>\n",
       "      <td>4135001</td>\n",
       "      <td>ih0vzX44oOqAx2f</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5521.000000</td>\n",
       "      <td>6386553.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6876</th>\n",
       "      <td>16-06-2020 05:45</td>\n",
       "      <td>4135001</td>\n",
       "      <td>3PZuoBAID5Wc2HD</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>7225042.000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>6877 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             DATE_TIME  PLANT_ID       SOURCE_KEY  DC_POWER    AC_POWER  \\\n",
       "0     10-06-2020 22:45   4135001  rGa61gmuvPhdLxV  0.000000    0.000000   \n",
       "1     25-05-2020 07:15   4135001  uHbuxQJl8lW7ozc  0.166544  236.262500   \n",
       "2     23-05-2020 17:45   4135001  1IF53ai7Xc0U56Y  0.109156  154.485714   \n",
       "3     15-05-2020 04:45   4135001  3PZuoBAID5Wc2HD  0.000000    0.000000   \n",
       "4     12-06-2020 16:30   4135001  iCRJl6heRkivqQ3  0.191808  272.157143   \n",
       "...                ...       ...              ...       ...         ...   \n",
       "6872  01-06-2020 10:00   4135001  zBIq5rxdHJRwDNY  0.539282  763.628571   \n",
       "6873  27-05-2020 02:00   4135001  VHMLBKoKgIrUVDU  0.000000    0.000000   \n",
       "6874  31-05-2020 21:30   4135001  3PZuoBAID5Wc2HD  0.000000    0.000000   \n",
       "6875  11-06-2020 18:45   4135001  ih0vzX44oOqAx2f  0.000000    0.000000   \n",
       "6876  16-06-2020 05:45   4135001  3PZuoBAID5Wc2HD  0.000000    0.000000   \n",
       "\n",
       "      DAILY_YIELD  TOTAL_YIELD  \n",
       "0     6565.000000  7310769.000  \n",
       "1      121.750000  7111973.750  \n",
       "2     8607.000000  6249141.000  \n",
       "3        0.000000  6987759.000  \n",
       "4     5567.428571  7391038.429  \n",
       "...           ...          ...  \n",
       "6872  1779.285714  6465018.286  \n",
       "6873     0.000000  7297615.000  \n",
       "6874  5816.000000  7115304.000  \n",
       "6875  5521.000000  6386553.000  \n",
       "6876     0.000000  7225042.000  \n",
       "\n",
       "[6877 rows x 7 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# normalizacja\n",
    "\n",
    "\n",
    "plant_normalized = plant_test.copy()\n",
    "column = 'DC_POWER'\n",
    "\n",
    "plant_normalized[column] = plant_normalized[column]  / plant_normalized[column].abs().max()\n",
    "\n",
    "plant_normalized"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "ac59ebe37160ed0dfa835113d9b8498d9f09ceb179beaac4002f036b9467c963"
  },
  "kernelspec": {
   "display_name": "Python 3.9.1 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.1"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}