ium_470623/IUM_dane02.ipynb

1480 lines
57 KiB
Plaintext
Raw Normal View History

2022-03-21 10:35:52 +01:00
{
"cells": [
{
"cell_type": "code",
2022-04-11 00:40:48 +02:00
"execution_count": 1,
2022-03-21 10:35:52 +01:00
"id": "expected-payroll",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2022-04-11 00:40:48 +02:00
"Requirement already satisfied: kaggle in c:\\users\\cgala\\anaconda3\\lib\\site-packages (1.5.12)\n",
"Requirement already satisfied: tqdm in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (4.50.2)\n",
"Requirement already satisfied: requests in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2.24.0)\n",
"Requirement already satisfied: certifi in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2020.6.20)\n",
"Requirement already satisfied: six>=1.10 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (1.15.0)\n",
"Requirement already satisfied: python-slugify in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (6.1.1)\n",
"Requirement already satisfied: urllib3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (1.25.11)\n",
"Requirement already satisfied: python-dateutil in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2.8.1)\n",
"Requirement already satisfied: idna<3,>=2.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from requests->kaggle) (2.10)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from requests->kaggle) (3.0.4)\n",
"Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: pandas in c:\\users\\cgala\\anaconda3\\lib\\site-packages (1.1.3)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (2.8.1)\n",
"Requirement already satisfied: numpy>=1.15.4 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (1.19.2)\n",
"Requirement already satisfied: pytz>=2017.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (2020.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
"Requirement already satisfied: seaborn in c:\\users\\cgala\\anaconda3\\lib\\site-packages (0.11.0)\n",
"Requirement already satisfied: numpy>=1.15 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.19.2)\n",
"Requirement already satisfied: scipy>=1.0 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.5.2)\n",
"Requirement already satisfied: matplotlib>=2.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (3.3.2)\n",
"Requirement already satisfied: pandas>=0.23 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.1.3)\n",
"Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.3.0)\n",
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (8.0.1)\n",
"Requirement already satisfied: certifi>=2020.06.20 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2020.6.20)\n",
"Requirement already satisfied: pytz>=2017.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas>=0.23->seaborn) (2020.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-dateutil>=2.1->matplotlib>=2.2->seaborn) (1.15.0)\n"
2022-03-21 10:35:52 +01:00
]
}
],
"source": [
"!pip install kaggle\n",
"!pip install pandas\n",
"!pip install seaborn"
]
},
{
"cell_type": "code",
2022-04-11 00:40:48 +02:00
"execution_count": 3,
2022-03-21 10:35:52 +01:00
"id": "genetic-plaintiff",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading steel-industry-energy-consumption.zip to D:\\UAM zajecia\\IUM\\ium_470623\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
" 0%| | 0.00/484k [00:00<?, ?B/s]\n",
2022-04-11 00:40:48 +02:00
"100%|##########| 484k/484k [00:00<00:00, 2.36MB/s]\n",
"100%|##########| 484k/484k [00:00<00:00, 2.36MB/s]\n"
2022-03-21 10:35:52 +01:00
]
}
],
"source": [
2022-04-11 00:40:48 +02:00
"!kaggle datasets download -d csafrit2/steel-industry-energy-consumption --force"
2022-03-21 10:35:52 +01:00
]
},
{
"cell_type": "code",
2022-04-11 00:40:48 +02:00
"execution_count": 3,
2022-03-21 10:35:52 +01:00
"id": "compatible-following",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: steel-industry-energy-consumption.zip\n",
" inflating: Steel_industry_data.csv \n"
]
}
],
"source": [
"!unzip -o steel-industry-energy-consumption.zip"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "interstate-ethnic",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>Usage_kWh</th>\n",
" <th>Lagging_Current_Reactive.Power_kVarh</th>\n",
" <th>Leading_Current_Reactive_Power_kVarh</th>\n",
" <th>CO2(tCO2)</th>\n",
" <th>Lagging_Current_Power_Factor</th>\n",
" <th>Leading_Current_Power_Factor</th>\n",
" <th>NSM</th>\n",
" <th>WeekStatus</th>\n",
" <th>Day_of_week</th>\n",
" <th>Load_Type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>01/01/2018 00:15</td>\n",
" <td>3.17</td>\n",
" <td>2.95</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>73.21</td>\n",
" <td>100.00</td>\n",
" <td>900</td>\n",
" <td>Weekday</td>\n",
" <td>Monday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>01/01/2018 00:30</td>\n",
" <td>4.00</td>\n",
" <td>4.46</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>66.77</td>\n",
" <td>100.00</td>\n",
" <td>1800</td>\n",
" <td>Weekday</td>\n",
" <td>Monday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>01/01/2018 00:45</td>\n",
" <td>3.24</td>\n",
" <td>3.28</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>70.28</td>\n",
" <td>100.00</td>\n",
" <td>2700</td>\n",
" <td>Weekday</td>\n",
" <td>Monday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>01/01/2018 01:00</td>\n",
" <td>3.31</td>\n",
" <td>3.56</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>68.09</td>\n",
" <td>100.00</td>\n",
" <td>3600</td>\n",
" <td>Weekday</td>\n",
" <td>Monday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>01/01/2018 01:15</td>\n",
" <td>3.82</td>\n",
" <td>4.50</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>64.72</td>\n",
" <td>100.00</td>\n",
" <td>4500</td>\n",
" <td>Weekday</td>\n",
" <td>Monday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35035</th>\n",
" <td>31/12/2018 23:00</td>\n",
" <td>3.85</td>\n",
" <td>4.86</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>62.10</td>\n",
" <td>100.00</td>\n",
" <td>82800</td>\n",
" <td>Weekday</td>\n",
" <td>Monday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35036</th>\n",
" <td>31/12/2018 23:15</td>\n",
" <td>3.74</td>\n",
" <td>3.74</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>70.71</td>\n",
" <td>100.00</td>\n",
" <td>83700</td>\n",
" <td>Weekday</td>\n",
" <td>Monday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35037</th>\n",
" <td>31/12/2018 23:30</td>\n",
" <td>3.78</td>\n",
" <td>3.17</td>\n",
" <td>0.07</td>\n",
" <td>0.0</td>\n",
" <td>76.62</td>\n",
" <td>99.98</td>\n",
" <td>84600</td>\n",
" <td>Weekday</td>\n",
" <td>Monday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35038</th>\n",
" <td>31/12/2018 23:45</td>\n",
" <td>3.78</td>\n",
" <td>3.06</td>\n",
" <td>0.11</td>\n",
" <td>0.0</td>\n",
" <td>77.72</td>\n",
" <td>99.96</td>\n",
" <td>85500</td>\n",
" <td>Weekday</td>\n",
" <td>Monday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35039</th>\n",
" <td>31/12/2018 00:00</td>\n",
" <td>3.67</td>\n",
" <td>3.02</td>\n",
" <td>0.07</td>\n",
" <td>0.0</td>\n",
" <td>77.22</td>\n",
" <td>99.98</td>\n",
" <td>0</td>\n",
" <td>Weekday</td>\n",
" <td>Monday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>35040 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
"0 01/01/2018 00:15 3.17 2.95 \n",
"1 01/01/2018 00:30 4.00 4.46 \n",
"2 01/01/2018 00:45 3.24 3.28 \n",
"3 01/01/2018 01:00 3.31 3.56 \n",
"4 01/01/2018 01:15 3.82 4.50 \n",
"... ... ... ... \n",
"35035 31/12/2018 23:00 3.85 4.86 \n",
"35036 31/12/2018 23:15 3.74 3.74 \n",
"35037 31/12/2018 23:30 3.78 3.17 \n",
"35038 31/12/2018 23:45 3.78 3.06 \n",
"35039 31/12/2018 00:00 3.67 3.02 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
"0 0.00 0.0 \n",
"1 0.00 0.0 \n",
"2 0.00 0.0 \n",
"3 0.00 0.0 \n",
"4 0.00 0.0 \n",
"... ... ... \n",
"35035 0.00 0.0 \n",
"35036 0.00 0.0 \n",
"35037 0.07 0.0 \n",
"35038 0.11 0.0 \n",
"35039 0.07 0.0 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor NSM \\\n",
"0 73.21 100.00 900 \n",
"1 66.77 100.00 1800 \n",
"2 70.28 100.00 2700 \n",
"3 68.09 100.00 3600 \n",
"4 64.72 100.00 4500 \n",
"... ... ... ... \n",
"35035 62.10 100.00 82800 \n",
"35036 70.71 100.00 83700 \n",
"35037 76.62 99.98 84600 \n",
"35038 77.72 99.96 85500 \n",
"35039 77.22 99.98 0 \n",
"\n",
" WeekStatus Day_of_week Load_Type \n",
"0 Weekday Monday Light_Load \n",
"1 Weekday Monday Light_Load \n",
"2 Weekday Monday Light_Load \n",
"3 Weekday Monday Light_Load \n",
"4 Weekday Monday Light_Load \n",
"... ... ... ... \n",
"35035 Weekday Monday Light_Load \n",
"35036 Weekday Monday Light_Load \n",
"35037 Weekday Monday Light_Load \n",
"35038 Weekday Monday Light_Load \n",
"35039 Weekday Monday Light_Load \n",
"\n",
"[35040 rows x 11 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"energy_data=pd.read_csv('Steel_industry_data.csv')\n",
"energy_data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "heated-spectacular",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>Usage_kWh</th>\n",
" <th>Lagging_Current_Reactive.Power_kVarh</th>\n",
" <th>Leading_Current_Reactive_Power_kVarh</th>\n",
" <th>CO2(tCO2)</th>\n",
" <th>Lagging_Current_Power_Factor</th>\n",
" <th>Leading_Current_Power_Factor</th>\n",
" <th>NSM</th>\n",
" <th>WeekStatus</th>\n",
" <th>Day_of_week</th>\n",
" <th>Load_Type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>35040</td>\n",
" <td>35040.000000</td>\n",
" <td>35040.000000</td>\n",
" <td>35040.000000</td>\n",
" <td>35040.000000</td>\n",
" <td>35040.000000</td>\n",
" <td>35040.000000</td>\n",
" <td>35040.000000</td>\n",
" <td>35040</td>\n",
" <td>35040</td>\n",
" <td>35040</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>35040</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>01/01/2018 00:15</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Weekday</td>\n",
" <td>Monday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>25056</td>\n",
" <td>5088</td>\n",
" <td>18072</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>27.386892</td>\n",
" <td>13.035384</td>\n",
" <td>3.870949</td>\n",
" <td>0.011524</td>\n",
" <td>80.578056</td>\n",
" <td>84.367870</td>\n",
" <td>42750.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>33.444380</td>\n",
" <td>16.306000</td>\n",
" <td>7.424463</td>\n",
" <td>0.016151</td>\n",
" <td>18.921322</td>\n",
" <td>30.456535</td>\n",
" <td>24940.534317</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>3.200000</td>\n",
" <td>2.300000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>63.320000</td>\n",
" <td>99.700000</td>\n",
" <td>21375.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>4.570000</td>\n",
" <td>5.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>87.960000</td>\n",
" <td>100.000000</td>\n",
" <td>42750.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>51.237500</td>\n",
" <td>22.640000</td>\n",
" <td>2.090000</td>\n",
" <td>0.020000</td>\n",
" <td>99.022500</td>\n",
" <td>100.000000</td>\n",
" <td>64125.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>157.180000</td>\n",
" <td>96.910000</td>\n",
" <td>27.760000</td>\n",
" <td>0.070000</td>\n",
" <td>100.000000</td>\n",
" <td>100.000000</td>\n",
" <td>85500.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
"count 35040 35040.000000 35040.000000 \n",
"unique 35040 NaN NaN \n",
"top 01/01/2018 00:15 NaN NaN \n",
"freq 1 NaN NaN \n",
"mean NaN 27.386892 13.035384 \n",
"std NaN 33.444380 16.306000 \n",
"min NaN 0.000000 0.000000 \n",
"25% NaN 3.200000 2.300000 \n",
"50% NaN 4.570000 5.000000 \n",
"75% NaN 51.237500 22.640000 \n",
"max NaN 157.180000 96.910000 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
"count 35040.000000 35040.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 3.870949 0.011524 \n",
"std 7.424463 0.016151 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
"75% 2.090000 0.020000 \n",
"max 27.760000 0.070000 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
"count 35040.000000 35040.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 80.578056 84.367870 \n",
"std 18.921322 30.456535 \n",
"min 0.000000 0.000000 \n",
"25% 63.320000 99.700000 \n",
"50% 87.960000 100.000000 \n",
"75% 99.022500 100.000000 \n",
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
"count 35040.000000 35040 35040 35040 \n",
"unique NaN 2 7 3 \n",
"top NaN Weekday Monday Light_Load \n",
"freq NaN 25056 5088 18072 \n",
"mean 42750.000000 NaN NaN NaN \n",
"std 24940.534317 NaN NaN NaN \n",
"min 0.000000 NaN NaN NaN \n",
"25% 21375.000000 NaN NaN NaN \n",
"50% 42750.000000 NaN NaN NaN \n",
"75% 64125.000000 NaN NaN NaN \n",
"max 85500.000000 NaN NaN NaN "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"energy_data.describe(include='all')"
]
},
{
"cell_type": "code",
2022-04-11 00:40:48 +02:00
"execution_count": 6,
2022-03-21 10:35:52 +01:00
"id": "loved-delight",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training set size:\n",
2022-04-11 00:40:48 +02:00
"(28032, 11)\n",
2022-03-21 10:35:52 +01:00
"Testing set size:\n",
2022-04-11 00:40:48 +02:00
"(3504, 11)\n",
2022-03-21 10:35:52 +01:00
"Dev set size:\n",
2022-04-11 00:40:48 +02:00
"(3504, 11)\n"
2022-03-21 10:35:52 +01:00
]
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
2022-04-11 00:40:48 +02:00
"train_data, test_data = train_test_split(energy_data, test_size=7008, random_state=1)\n",
"test_data, dev_data = train_test_split(test_data, test_size=3504, random_state=1)\n",
2022-03-21 10:35:52 +01:00
"print('Training set size:')\n",
"print(train_data.shape)\n",
"print('Testing set size:')\n",
"print(test_data.shape)\n",
"print('Dev set size:')\n",
"print(dev_data.shape)"
]
},
{
"cell_type": "code",
2022-04-11 00:40:48 +02:00
"execution_count": 7,
2022-03-21 10:35:52 +01:00
"id": "formed-virginia",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>Usage_kWh</th>\n",
" <th>Lagging_Current_Reactive.Power_kVarh</th>\n",
" <th>Leading_Current_Reactive_Power_kVarh</th>\n",
" <th>CO2(tCO2)</th>\n",
" <th>Lagging_Current_Power_Factor</th>\n",
" <th>Leading_Current_Power_Factor</th>\n",
" <th>NSM</th>\n",
" <th>WeekStatus</th>\n",
" <th>Day_of_week</th>\n",
" <th>Load_Type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
2022-04-11 00:40:48 +02:00
" <td>28032</td>\n",
" <td>28032.000000</td>\n",
" <td>28032.000000</td>\n",
" <td>28032.000000</td>\n",
" <td>28032.000000</td>\n",
" <td>28032.000000</td>\n",
" <td>28032.000000</td>\n",
" <td>28032.000000</td>\n",
" <td>28032</td>\n",
" <td>28032</td>\n",
" <td>28032</td>\n",
2022-03-21 10:35:52 +01:00
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
2022-04-11 00:40:48 +02:00
" <td>28032</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
2022-04-11 00:40:48 +02:00
" <td>07/08/2018 14:15</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Weekday</td>\n",
" <td>Monday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>19998</td>\n",
" <td>4087</td>\n",
" <td>14467</td>\n",
2022-03-21 10:35:52 +01:00
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>27.340174</td>\n",
" <td>13.026801</td>\n",
" <td>3.875001</td>\n",
" <td>0.011498</td>\n",
" <td>80.520145</td>\n",
" <td>84.369511</td>\n",
" <td>42761.429795</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>33.469130</td>\n",
" <td>16.289348</td>\n",
" <td>7.445898</td>\n",
" <td>0.016153</td>\n",
" <td>18.932825</td>\n",
" <td>30.462193</td>\n",
" <td>24944.585138</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>3.200000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>2.300000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>63.227500</td>\n",
" <td>99.710000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>20700.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>4.570000</td>\n",
" <td>5.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>87.870000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>100.000000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>43200.000000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>51.190000</td>\n",
" <td>22.750000</td>\n",
" <td>2.020000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>0.020000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>99.000000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>100.000000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>64800.000000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>157.180000</td>\n",
" <td>96.910000</td>\n",
" <td>27.760000</td>\n",
" <td>0.070000</td>\n",
" <td>100.000000</td>\n",
" <td>100.000000</td>\n",
" <td>85500.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
2022-04-11 00:40:48 +02:00
"count 28032 28032.000000 28032.000000 \n",
"unique 28032 NaN NaN \n",
"top 07/08/2018 14:15 NaN NaN \n",
2022-03-21 10:35:52 +01:00
"freq 1 NaN NaN \n",
2022-04-11 00:40:48 +02:00
"mean NaN 27.340174 13.026801 \n",
"std NaN 33.469130 16.289348 \n",
2022-03-21 10:35:52 +01:00
"min NaN 0.000000 0.000000 \n",
2022-04-11 00:40:48 +02:00
"25% NaN 3.200000 2.300000 \n",
2022-03-21 10:35:52 +01:00
"50% NaN 4.570000 5.000000 \n",
2022-04-11 00:40:48 +02:00
"75% NaN 51.190000 22.750000 \n",
2022-03-21 10:35:52 +01:00
"max NaN 157.180000 96.910000 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
2022-04-11 00:40:48 +02:00
"count 28032.000000 28032.000000 \n",
2022-03-21 10:35:52 +01:00
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
2022-04-11 00:40:48 +02:00
"mean 3.875001 0.011498 \n",
"std 7.445898 0.016153 \n",
2022-03-21 10:35:52 +01:00
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
2022-04-11 00:40:48 +02:00
"75% 2.020000 0.020000 \n",
2022-03-21 10:35:52 +01:00
"max 27.760000 0.070000 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
2022-04-11 00:40:48 +02:00
"count 28032.000000 28032.000000 \n",
2022-03-21 10:35:52 +01:00
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
2022-04-11 00:40:48 +02:00
"mean 80.520145 84.369511 \n",
"std 18.932825 30.462193 \n",
2022-03-21 10:35:52 +01:00
"min 0.000000 0.000000 \n",
2022-04-11 00:40:48 +02:00
"25% 63.227500 99.710000 \n",
"50% 87.870000 100.000000 \n",
"75% 99.000000 100.000000 \n",
2022-03-21 10:35:52 +01:00
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
2022-04-11 00:40:48 +02:00
"count 28032.000000 28032 28032 28032 \n",
2022-03-21 10:35:52 +01:00
"unique NaN 2 7 3 \n",
"top NaN Weekday Monday Light_Load \n",
2022-04-11 00:40:48 +02:00
"freq NaN 19998 4087 14467 \n",
"mean 42761.429795 NaN NaN NaN \n",
"std 24944.585138 NaN NaN NaN \n",
2022-03-21 10:35:52 +01:00
"min 0.000000 NaN NaN NaN \n",
"25% 20700.000000 NaN NaN NaN \n",
2022-04-11 00:40:48 +02:00
"50% 43200.000000 NaN NaN NaN \n",
"75% 64800.000000 NaN NaN NaN \n",
2022-03-21 10:35:52 +01:00
"max 85500.000000 NaN NaN NaN "
]
},
2022-04-11 00:40:48 +02:00
"execution_count": 7,
2022-03-21 10:35:52 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_data.describe(include='all')"
]
},
{
"cell_type": "code",
2022-04-11 00:40:48 +02:00
"execution_count": 8,
2022-03-21 10:35:52 +01:00
"id": "radical-score",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>Usage_kWh</th>\n",
" <th>Lagging_Current_Reactive.Power_kVarh</th>\n",
" <th>Leading_Current_Reactive_Power_kVarh</th>\n",
" <th>CO2(tCO2)</th>\n",
" <th>Lagging_Current_Power_Factor</th>\n",
" <th>Leading_Current_Power_Factor</th>\n",
" <th>NSM</th>\n",
" <th>WeekStatus</th>\n",
" <th>Day_of_week</th>\n",
" <th>Load_Type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
2022-04-11 00:40:48 +02:00
" <td>3504</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504</td>\n",
" <td>3504</td>\n",
" <td>3504</td>\n",
2022-03-21 10:35:52 +01:00
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
2022-04-11 00:40:48 +02:00
" <td>3504</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
2022-04-11 00:40:48 +02:00
" <td>20/06/2018 13:00</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Weekday</td>\n",
2022-04-11 00:40:48 +02:00
" <td>Wednesday</td>\n",
2022-03-21 10:35:52 +01:00
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>2522</td>\n",
" <td>527</td>\n",
" <td>1837</td>\n",
2022-03-21 10:35:52 +01:00
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>26.355685</td>\n",
" <td>12.374717</td>\n",
" <td>3.891093</td>\n",
" <td>0.011050</td>\n",
" <td>80.687751</td>\n",
" <td>84.082794</td>\n",
" <td>42594.092466</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>32.519749</td>\n",
" <td>15.830961</td>\n",
" <td>7.353028</td>\n",
" <td>0.015762</td>\n",
" <td>19.053018</td>\n",
" <td>30.614144</td>\n",
" <td>25222.804637</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>2.480000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>40.290000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>12.540000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>3.192500</td>\n",
" <td>2.090000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>63.130000</td>\n",
" <td>99.562500</td>\n",
" <td>20700.000000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>4.500000</td>\n",
" <td>4.900000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>88.210000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>100.000000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>42300.000000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>49.570000</td>\n",
" <td>20.700000</td>\n",
" <td>2.967500</td>\n",
2022-03-21 10:35:52 +01:00
" <td>0.020000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>99.390000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>100.000000</td>\n",
" <td>64800.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>153.140000</td>\n",
" <td>82.940000</td>\n",
" <td>27.650000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>0.070000</td>\n",
" <td>100.000000</td>\n",
" <td>100.000000</td>\n",
" <td>85500.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
2022-04-11 00:40:48 +02:00
"count 3504 3504.000000 3504.000000 \n",
"unique 3504 NaN NaN \n",
"top 20/06/2018 13:00 NaN NaN \n",
2022-03-21 10:35:52 +01:00
"freq 1 NaN NaN \n",
2022-04-11 00:40:48 +02:00
"mean NaN 26.355685 12.374717 \n",
"std NaN 32.519749 15.830961 \n",
2022-03-21 10:35:52 +01:00
"min NaN 2.480000 0.000000 \n",
2022-04-11 00:40:48 +02:00
"25% NaN 3.192500 2.090000 \n",
"50% NaN 4.500000 4.900000 \n",
"75% NaN 49.570000 20.700000 \n",
"max NaN 153.140000 82.940000 \n",
2022-03-21 10:35:52 +01:00
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
2022-04-11 00:40:48 +02:00
"count 3504.000000 3504.000000 \n",
2022-03-21 10:35:52 +01:00
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
2022-04-11 00:40:48 +02:00
"mean 3.891093 0.011050 \n",
"std 7.353028 0.015762 \n",
2022-03-21 10:35:52 +01:00
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
2022-04-11 00:40:48 +02:00
"75% 2.967500 0.020000 \n",
"max 27.650000 0.070000 \n",
2022-03-21 10:35:52 +01:00
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
2022-04-11 00:40:48 +02:00
"count 3504.000000 3504.000000 \n",
2022-03-21 10:35:52 +01:00
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
2022-04-11 00:40:48 +02:00
"mean 80.687751 84.082794 \n",
"std 19.053018 30.614144 \n",
"min 40.290000 12.540000 \n",
"25% 63.130000 99.562500 \n",
"50% 88.210000 100.000000 \n",
"75% 99.390000 100.000000 \n",
2022-03-21 10:35:52 +01:00
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
2022-04-11 00:40:48 +02:00
"count 3504.000000 3504 3504 3504 \n",
2022-03-21 10:35:52 +01:00
"unique NaN 2 7 3 \n",
2022-04-11 00:40:48 +02:00
"top NaN Weekday Wednesday Light_Load \n",
"freq NaN 2522 527 1837 \n",
"mean 42594.092466 NaN NaN NaN \n",
"std 25222.804637 NaN NaN NaN \n",
2022-03-21 10:35:52 +01:00
"min 0.000000 NaN NaN NaN \n",
2022-04-11 00:40:48 +02:00
"25% 20700.000000 NaN NaN NaN \n",
"50% 42300.000000 NaN NaN NaN \n",
2022-03-21 10:35:52 +01:00
"75% 64800.000000 NaN NaN NaN \n",
"max 85500.000000 NaN NaN NaN "
]
},
2022-04-11 00:40:48 +02:00
"execution_count": 8,
2022-03-21 10:35:52 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_data.describe(include='all')"
]
},
{
"cell_type": "code",
2022-04-11 00:40:48 +02:00
"execution_count": 9,
2022-03-21 10:35:52 +01:00
"id": "attempted-lafayette",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>Usage_kWh</th>\n",
" <th>Lagging_Current_Reactive.Power_kVarh</th>\n",
" <th>Leading_Current_Reactive_Power_kVarh</th>\n",
" <th>CO2(tCO2)</th>\n",
" <th>Lagging_Current_Power_Factor</th>\n",
" <th>Leading_Current_Power_Factor</th>\n",
" <th>NSM</th>\n",
" <th>WeekStatus</th>\n",
" <th>Day_of_week</th>\n",
" <th>Load_Type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
2022-04-11 00:40:48 +02:00
" <td>3504</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504</td>\n",
" <td>3504</td>\n",
" <td>3504</td>\n",
2022-03-21 10:35:52 +01:00
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
2022-04-11 00:40:48 +02:00
" <td>3504</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
2022-04-11 00:40:48 +02:00
" <td>16/11/2018 16:45</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Weekday</td>\n",
2022-04-11 00:40:48 +02:00
" <td>Tuesday</td>\n",
2022-03-21 10:35:52 +01:00
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>2536</td>\n",
" <td>543</td>\n",
" <td>1768</td>\n",
2022-03-21 10:35:52 +01:00
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>28.791849</td>\n",
" <td>13.764709</td>\n",
" <td>3.818382</td>\n",
" <td>0.012212</td>\n",
" <td>80.931650</td>\n",
" <td>84.639817</td>\n",
" <td>42814.469178</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>34.115238</td>\n",
" <td>16.872400</td>\n",
" <td>7.325016</td>\n",
" <td>0.016499</td>\n",
" <td>18.696834</td>\n",
" <td>30.258743</td>\n",
" <td>24628.829557</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>2.480000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>38.330000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>13.050000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>3.240000</td>\n",
" <td>2.380000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>64.112500</td>\n",
" <td>99.730000</td>\n",
" <td>21600.000000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>4.720000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>5.110000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>88.325000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>100.000000</td>\n",
" <td>43200.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>53.227500</td>\n",
" <td>24.810000</td>\n",
" <td>1.917500</td>\n",
2022-03-21 10:35:52 +01:00
" <td>0.020000</td>\n",
2022-04-11 00:40:48 +02:00
" <td>98.792500</td>\n",
2022-03-21 10:35:52 +01:00
" <td>100.000000</td>\n",
" <td>63900.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
2022-04-11 00:40:48 +02:00
" <td>146.880000</td>\n",
" <td>87.700000</td>\n",
" <td>27.540000</td>\n",
" <td>0.070000</td>\n",
2022-03-21 10:35:52 +01:00
" <td>100.000000</td>\n",
" <td>100.000000</td>\n",
" <td>85500.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
2022-04-11 00:40:48 +02:00
"count 3504 3504.000000 3504.000000 \n",
"unique 3504 NaN NaN \n",
"top 16/11/2018 16:45 NaN NaN \n",
2022-03-21 10:35:52 +01:00
"freq 1 NaN NaN \n",
2022-04-11 00:40:48 +02:00
"mean NaN 28.791849 13.764709 \n",
"std NaN 34.115238 16.872400 \n",
"min NaN 2.480000 0.000000 \n",
"25% NaN 3.240000 2.380000 \n",
"50% NaN 4.720000 5.110000 \n",
"75% NaN 53.227500 24.810000 \n",
"max NaN 146.880000 87.700000 \n",
2022-03-21 10:35:52 +01:00
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
2022-04-11 00:40:48 +02:00
"count 3504.000000 3504.000000 \n",
2022-03-21 10:35:52 +01:00
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
2022-04-11 00:40:48 +02:00
"mean 3.818382 0.012212 \n",
"std 7.325016 0.016499 \n",
2022-03-21 10:35:52 +01:00
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
2022-04-11 00:40:48 +02:00
"75% 1.917500 0.020000 \n",
"max 27.540000 0.070000 \n",
2022-03-21 10:35:52 +01:00
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
2022-04-11 00:40:48 +02:00
"count 3504.000000 3504.000000 \n",
2022-03-21 10:35:52 +01:00
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
2022-04-11 00:40:48 +02:00
"mean 80.931650 84.639817 \n",
"std 18.696834 30.258743 \n",
"min 38.330000 13.050000 \n",
"25% 64.112500 99.730000 \n",
"50% 88.325000 100.000000 \n",
"75% 98.792500 100.000000 \n",
2022-03-21 10:35:52 +01:00
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
2022-04-11 00:40:48 +02:00
"count 3504.000000 3504 3504 3504 \n",
2022-03-21 10:35:52 +01:00
"unique NaN 2 7 3 \n",
2022-04-11 00:40:48 +02:00
"top NaN Weekday Tuesday Light_Load \n",
"freq NaN 2536 543 1768 \n",
"mean 42814.469178 NaN NaN NaN \n",
"std 24628.829557 NaN NaN NaN \n",
2022-03-21 10:35:52 +01:00
"min 0.000000 NaN NaN NaN \n",
2022-04-11 00:40:48 +02:00
"25% 21600.000000 NaN NaN NaN \n",
2022-03-21 10:35:52 +01:00
"50% 43200.000000 NaN NaN NaN \n",
"75% 63900.000000 NaN NaN NaN \n",
"max 85500.000000 NaN NaN NaN "
]
},
2022-04-11 00:40:48 +02:00
"execution_count": 9,
2022-03-21 10:35:52 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dev_data.describe(include='all')"
]
2022-04-11 00:40:48 +02:00
},
{
"cell_type": "code",
"execution_count": 11,
"id": "banned-scottish",
"metadata": {},
"outputs": [],
"source": [
"test_data.to_csv(\"steel_industry_data_test.csv\", encoding=\"utf-8\", index=False)\n",
"dev_data.to_csv(\"steel_industry_data_dev.csv\", encoding=\"utf-8\", index=False)\n",
"train_data.to_csv(\"steel_industry_data_train.csv\", encoding=\"utf-8\", index=False)"
]
2022-03-21 10:35:52 +01:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
2022-04-11 00:40:48 +02:00
}