1480 lines
57 KiB
Plaintext
1480 lines
57 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "expected-payroll",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Requirement already satisfied: kaggle in c:\\users\\cgala\\anaconda3\\lib\\site-packages (1.5.12)\n",
|
||
"Requirement already satisfied: tqdm in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (4.50.2)\n",
|
||
"Requirement already satisfied: requests in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2.24.0)\n",
|
||
"Requirement already satisfied: certifi in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2020.6.20)\n",
|
||
"Requirement already satisfied: six>=1.10 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (1.15.0)\n",
|
||
"Requirement already satisfied: python-slugify in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (6.1.1)\n",
|
||
"Requirement already satisfied: urllib3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (1.25.11)\n",
|
||
"Requirement already satisfied: python-dateutil in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2.8.1)\n",
|
||
"Requirement already satisfied: idna<3,>=2.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from requests->kaggle) (2.10)\n",
|
||
"Requirement already satisfied: chardet<4,>=3.0.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from requests->kaggle) (3.0.4)\n",
|
||
"Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
|
||
"Requirement already satisfied: pandas in c:\\users\\cgala\\anaconda3\\lib\\site-packages (1.1.3)\n",
|
||
"Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (2.8.1)\n",
|
||
"Requirement already satisfied: numpy>=1.15.4 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (1.19.2)\n",
|
||
"Requirement already satisfied: pytz>=2017.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (2020.1)\n",
|
||
"Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
|
||
"Requirement already satisfied: seaborn in c:\\users\\cgala\\anaconda3\\lib\\site-packages (0.11.0)\n",
|
||
"Requirement already satisfied: numpy>=1.15 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.19.2)\n",
|
||
"Requirement already satisfied: scipy>=1.0 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.5.2)\n",
|
||
"Requirement already satisfied: matplotlib>=2.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (3.3.2)\n",
|
||
"Requirement already satisfied: pandas>=0.23 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.1.3)\n",
|
||
"Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n",
|
||
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.3.0)\n",
|
||
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n",
|
||
"Requirement already satisfied: cycler>=0.10 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n",
|
||
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (8.0.1)\n",
|
||
"Requirement already satisfied: certifi>=2020.06.20 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2020.6.20)\n",
|
||
"Requirement already satisfied: pytz>=2017.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas>=0.23->seaborn) (2020.1)\n",
|
||
"Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-dateutil>=2.1->matplotlib>=2.2->seaborn) (1.15.0)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!pip install kaggle\n",
|
||
"!pip install pandas\n",
|
||
"!pip install seaborn"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "genetic-plaintiff",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Downloading steel-industry-energy-consumption.zip to D:\\UAM zajecia\\IUM\\ium_470623\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n",
|
||
" 0%| | 0.00/484k [00:00<?, ?B/s]\n",
|
||
"100%|##########| 484k/484k [00:00<00:00, 2.36MB/s]\n",
|
||
"100%|##########| 484k/484k [00:00<00:00, 2.36MB/s]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!kaggle datasets download -d csafrit2/steel-industry-energy-consumption --force"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "compatible-following",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Archive: steel-industry-energy-consumption.zip\n",
|
||
" inflating: Steel_industry_data.csv \n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!unzip -o steel-industry-energy-consumption.zip"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "interstate-ethnic",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>date</th>\n",
|
||
" <th>Usage_kWh</th>\n",
|
||
" <th>Lagging_Current_Reactive.Power_kVarh</th>\n",
|
||
" <th>Leading_Current_Reactive_Power_kVarh</th>\n",
|
||
" <th>CO2(tCO2)</th>\n",
|
||
" <th>Lagging_Current_Power_Factor</th>\n",
|
||
" <th>Leading_Current_Power_Factor</th>\n",
|
||
" <th>NSM</th>\n",
|
||
" <th>WeekStatus</th>\n",
|
||
" <th>Day_of_week</th>\n",
|
||
" <th>Load_Type</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>01/01/2018 00:15</td>\n",
|
||
" <td>3.17</td>\n",
|
||
" <td>2.95</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>73.21</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>900</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Monday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>01/01/2018 00:30</td>\n",
|
||
" <td>4.00</td>\n",
|
||
" <td>4.46</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>66.77</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>1800</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Monday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>01/01/2018 00:45</td>\n",
|
||
" <td>3.24</td>\n",
|
||
" <td>3.28</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>70.28</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>2700</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Monday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>01/01/2018 01:00</td>\n",
|
||
" <td>3.31</td>\n",
|
||
" <td>3.56</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>68.09</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>3600</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Monday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>01/01/2018 01:15</td>\n",
|
||
" <td>3.82</td>\n",
|
||
" <td>4.50</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>64.72</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>4500</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Monday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>35035</th>\n",
|
||
" <td>31/12/2018 23:00</td>\n",
|
||
" <td>3.85</td>\n",
|
||
" <td>4.86</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>62.10</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>82800</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Monday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>35036</th>\n",
|
||
" <td>31/12/2018 23:15</td>\n",
|
||
" <td>3.74</td>\n",
|
||
" <td>3.74</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>70.71</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>83700</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Monday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>35037</th>\n",
|
||
" <td>31/12/2018 23:30</td>\n",
|
||
" <td>3.78</td>\n",
|
||
" <td>3.17</td>\n",
|
||
" <td>0.07</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>76.62</td>\n",
|
||
" <td>99.98</td>\n",
|
||
" <td>84600</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Monday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>35038</th>\n",
|
||
" <td>31/12/2018 23:45</td>\n",
|
||
" <td>3.78</td>\n",
|
||
" <td>3.06</td>\n",
|
||
" <td>0.11</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>77.72</td>\n",
|
||
" <td>99.96</td>\n",
|
||
" <td>85500</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Monday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>35039</th>\n",
|
||
" <td>31/12/2018 00:00</td>\n",
|
||
" <td>3.67</td>\n",
|
||
" <td>3.02</td>\n",
|
||
" <td>0.07</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>77.22</td>\n",
|
||
" <td>99.98</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Monday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>35040 rows × 11 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
|
||
"0 01/01/2018 00:15 3.17 2.95 \n",
|
||
"1 01/01/2018 00:30 4.00 4.46 \n",
|
||
"2 01/01/2018 00:45 3.24 3.28 \n",
|
||
"3 01/01/2018 01:00 3.31 3.56 \n",
|
||
"4 01/01/2018 01:15 3.82 4.50 \n",
|
||
"... ... ... ... \n",
|
||
"35035 31/12/2018 23:00 3.85 4.86 \n",
|
||
"35036 31/12/2018 23:15 3.74 3.74 \n",
|
||
"35037 31/12/2018 23:30 3.78 3.17 \n",
|
||
"35038 31/12/2018 23:45 3.78 3.06 \n",
|
||
"35039 31/12/2018 00:00 3.67 3.02 \n",
|
||
"\n",
|
||
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
|
||
"0 0.00 0.0 \n",
|
||
"1 0.00 0.0 \n",
|
||
"2 0.00 0.0 \n",
|
||
"3 0.00 0.0 \n",
|
||
"4 0.00 0.0 \n",
|
||
"... ... ... \n",
|
||
"35035 0.00 0.0 \n",
|
||
"35036 0.00 0.0 \n",
|
||
"35037 0.07 0.0 \n",
|
||
"35038 0.11 0.0 \n",
|
||
"35039 0.07 0.0 \n",
|
||
"\n",
|
||
" Lagging_Current_Power_Factor Leading_Current_Power_Factor NSM \\\n",
|
||
"0 73.21 100.00 900 \n",
|
||
"1 66.77 100.00 1800 \n",
|
||
"2 70.28 100.00 2700 \n",
|
||
"3 68.09 100.00 3600 \n",
|
||
"4 64.72 100.00 4500 \n",
|
||
"... ... ... ... \n",
|
||
"35035 62.10 100.00 82800 \n",
|
||
"35036 70.71 100.00 83700 \n",
|
||
"35037 76.62 99.98 84600 \n",
|
||
"35038 77.72 99.96 85500 \n",
|
||
"35039 77.22 99.98 0 \n",
|
||
"\n",
|
||
" WeekStatus Day_of_week Load_Type \n",
|
||
"0 Weekday Monday Light_Load \n",
|
||
"1 Weekday Monday Light_Load \n",
|
||
"2 Weekday Monday Light_Load \n",
|
||
"3 Weekday Monday Light_Load \n",
|
||
"4 Weekday Monday Light_Load \n",
|
||
"... ... ... ... \n",
|
||
"35035 Weekday Monday Light_Load \n",
|
||
"35036 Weekday Monday Light_Load \n",
|
||
"35037 Weekday Monday Light_Load \n",
|
||
"35038 Weekday Monday Light_Load \n",
|
||
"35039 Weekday Monday Light_Load \n",
|
||
"\n",
|
||
"[35040 rows x 11 columns]"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"energy_data=pd.read_csv('Steel_industry_data.csv')\n",
|
||
"energy_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "heated-spectacular",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>date</th>\n",
|
||
" <th>Usage_kWh</th>\n",
|
||
" <th>Lagging_Current_Reactive.Power_kVarh</th>\n",
|
||
" <th>Leading_Current_Reactive_Power_kVarh</th>\n",
|
||
" <th>CO2(tCO2)</th>\n",
|
||
" <th>Lagging_Current_Power_Factor</th>\n",
|
||
" <th>Leading_Current_Power_Factor</th>\n",
|
||
" <th>NSM</th>\n",
|
||
" <th>WeekStatus</th>\n",
|
||
" <th>Day_of_week</th>\n",
|
||
" <th>Load_Type</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>35040</td>\n",
|
||
" <td>35040.000000</td>\n",
|
||
" <td>35040.000000</td>\n",
|
||
" <td>35040.000000</td>\n",
|
||
" <td>35040.000000</td>\n",
|
||
" <td>35040.000000</td>\n",
|
||
" <td>35040.000000</td>\n",
|
||
" <td>35040.000000</td>\n",
|
||
" <td>35040</td>\n",
|
||
" <td>35040</td>\n",
|
||
" <td>35040</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>35040</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>01/01/2018 00:15</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Monday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>25056</td>\n",
|
||
" <td>5088</td>\n",
|
||
" <td>18072</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>27.386892</td>\n",
|
||
" <td>13.035384</td>\n",
|
||
" <td>3.870949</td>\n",
|
||
" <td>0.011524</td>\n",
|
||
" <td>80.578056</td>\n",
|
||
" <td>84.367870</td>\n",
|
||
" <td>42750.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>33.444380</td>\n",
|
||
" <td>16.306000</td>\n",
|
||
" <td>7.424463</td>\n",
|
||
" <td>0.016151</td>\n",
|
||
" <td>18.921322</td>\n",
|
||
" <td>30.456535</td>\n",
|
||
" <td>24940.534317</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3.200000</td>\n",
|
||
" <td>2.300000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>63.320000</td>\n",
|
||
" <td>99.700000</td>\n",
|
||
" <td>21375.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4.570000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>87.960000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>42750.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>51.237500</td>\n",
|
||
" <td>22.640000</td>\n",
|
||
" <td>2.090000</td>\n",
|
||
" <td>0.020000</td>\n",
|
||
" <td>99.022500</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>64125.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>157.180000</td>\n",
|
||
" <td>96.910000</td>\n",
|
||
" <td>27.760000</td>\n",
|
||
" <td>0.070000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>85500.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
|
||
"count 35040 35040.000000 35040.000000 \n",
|
||
"unique 35040 NaN NaN \n",
|
||
"top 01/01/2018 00:15 NaN NaN \n",
|
||
"freq 1 NaN NaN \n",
|
||
"mean NaN 27.386892 13.035384 \n",
|
||
"std NaN 33.444380 16.306000 \n",
|
||
"min NaN 0.000000 0.000000 \n",
|
||
"25% NaN 3.200000 2.300000 \n",
|
||
"50% NaN 4.570000 5.000000 \n",
|
||
"75% NaN 51.237500 22.640000 \n",
|
||
"max NaN 157.180000 96.910000 \n",
|
||
"\n",
|
||
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
|
||
"count 35040.000000 35040.000000 \n",
|
||
"unique NaN NaN \n",
|
||
"top NaN NaN \n",
|
||
"freq NaN NaN \n",
|
||
"mean 3.870949 0.011524 \n",
|
||
"std 7.424463 0.016151 \n",
|
||
"min 0.000000 0.000000 \n",
|
||
"25% 0.000000 0.000000 \n",
|
||
"50% 0.000000 0.000000 \n",
|
||
"75% 2.090000 0.020000 \n",
|
||
"max 27.760000 0.070000 \n",
|
||
"\n",
|
||
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
|
||
"count 35040.000000 35040.000000 \n",
|
||
"unique NaN NaN \n",
|
||
"top NaN NaN \n",
|
||
"freq NaN NaN \n",
|
||
"mean 80.578056 84.367870 \n",
|
||
"std 18.921322 30.456535 \n",
|
||
"min 0.000000 0.000000 \n",
|
||
"25% 63.320000 99.700000 \n",
|
||
"50% 87.960000 100.000000 \n",
|
||
"75% 99.022500 100.000000 \n",
|
||
"max 100.000000 100.000000 \n",
|
||
"\n",
|
||
" NSM WeekStatus Day_of_week Load_Type \n",
|
||
"count 35040.000000 35040 35040 35040 \n",
|
||
"unique NaN 2 7 3 \n",
|
||
"top NaN Weekday Monday Light_Load \n",
|
||
"freq NaN 25056 5088 18072 \n",
|
||
"mean 42750.000000 NaN NaN NaN \n",
|
||
"std 24940.534317 NaN NaN NaN \n",
|
||
"min 0.000000 NaN NaN NaN \n",
|
||
"25% 21375.000000 NaN NaN NaN \n",
|
||
"50% 42750.000000 NaN NaN NaN \n",
|
||
"75% 64125.000000 NaN NaN NaN \n",
|
||
"max 85500.000000 NaN NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"energy_data.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "loved-delight",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Training set size:\n",
|
||
"(28032, 11)\n",
|
||
"Testing set size:\n",
|
||
"(3504, 11)\n",
|
||
"Dev set size:\n",
|
||
"(3504, 11)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"\n",
|
||
"train_data, test_data = train_test_split(energy_data, test_size=7008, random_state=1)\n",
|
||
"test_data, dev_data = train_test_split(test_data, test_size=3504, random_state=1)\n",
|
||
"print('Training set size:')\n",
|
||
"print(train_data.shape)\n",
|
||
"print('Testing set size:')\n",
|
||
"print(test_data.shape)\n",
|
||
"print('Dev set size:')\n",
|
||
"print(dev_data.shape)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "formed-virginia",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>date</th>\n",
|
||
" <th>Usage_kWh</th>\n",
|
||
" <th>Lagging_Current_Reactive.Power_kVarh</th>\n",
|
||
" <th>Leading_Current_Reactive_Power_kVarh</th>\n",
|
||
" <th>CO2(tCO2)</th>\n",
|
||
" <th>Lagging_Current_Power_Factor</th>\n",
|
||
" <th>Leading_Current_Power_Factor</th>\n",
|
||
" <th>NSM</th>\n",
|
||
" <th>WeekStatus</th>\n",
|
||
" <th>Day_of_week</th>\n",
|
||
" <th>Load_Type</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>28032</td>\n",
|
||
" <td>28032.000000</td>\n",
|
||
" <td>28032.000000</td>\n",
|
||
" <td>28032.000000</td>\n",
|
||
" <td>28032.000000</td>\n",
|
||
" <td>28032.000000</td>\n",
|
||
" <td>28032.000000</td>\n",
|
||
" <td>28032.000000</td>\n",
|
||
" <td>28032</td>\n",
|
||
" <td>28032</td>\n",
|
||
" <td>28032</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>28032</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>07/08/2018 14:15</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Monday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>19998</td>\n",
|
||
" <td>4087</td>\n",
|
||
" <td>14467</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>27.340174</td>\n",
|
||
" <td>13.026801</td>\n",
|
||
" <td>3.875001</td>\n",
|
||
" <td>0.011498</td>\n",
|
||
" <td>80.520145</td>\n",
|
||
" <td>84.369511</td>\n",
|
||
" <td>42761.429795</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>33.469130</td>\n",
|
||
" <td>16.289348</td>\n",
|
||
" <td>7.445898</td>\n",
|
||
" <td>0.016153</td>\n",
|
||
" <td>18.932825</td>\n",
|
||
" <td>30.462193</td>\n",
|
||
" <td>24944.585138</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3.200000</td>\n",
|
||
" <td>2.300000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>63.227500</td>\n",
|
||
" <td>99.710000</td>\n",
|
||
" <td>20700.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4.570000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>87.870000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>43200.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>51.190000</td>\n",
|
||
" <td>22.750000</td>\n",
|
||
" <td>2.020000</td>\n",
|
||
" <td>0.020000</td>\n",
|
||
" <td>99.000000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>64800.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>157.180000</td>\n",
|
||
" <td>96.910000</td>\n",
|
||
" <td>27.760000</td>\n",
|
||
" <td>0.070000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>85500.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
|
||
"count 28032 28032.000000 28032.000000 \n",
|
||
"unique 28032 NaN NaN \n",
|
||
"top 07/08/2018 14:15 NaN NaN \n",
|
||
"freq 1 NaN NaN \n",
|
||
"mean NaN 27.340174 13.026801 \n",
|
||
"std NaN 33.469130 16.289348 \n",
|
||
"min NaN 0.000000 0.000000 \n",
|
||
"25% NaN 3.200000 2.300000 \n",
|
||
"50% NaN 4.570000 5.000000 \n",
|
||
"75% NaN 51.190000 22.750000 \n",
|
||
"max NaN 157.180000 96.910000 \n",
|
||
"\n",
|
||
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
|
||
"count 28032.000000 28032.000000 \n",
|
||
"unique NaN NaN \n",
|
||
"top NaN NaN \n",
|
||
"freq NaN NaN \n",
|
||
"mean 3.875001 0.011498 \n",
|
||
"std 7.445898 0.016153 \n",
|
||
"min 0.000000 0.000000 \n",
|
||
"25% 0.000000 0.000000 \n",
|
||
"50% 0.000000 0.000000 \n",
|
||
"75% 2.020000 0.020000 \n",
|
||
"max 27.760000 0.070000 \n",
|
||
"\n",
|
||
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
|
||
"count 28032.000000 28032.000000 \n",
|
||
"unique NaN NaN \n",
|
||
"top NaN NaN \n",
|
||
"freq NaN NaN \n",
|
||
"mean 80.520145 84.369511 \n",
|
||
"std 18.932825 30.462193 \n",
|
||
"min 0.000000 0.000000 \n",
|
||
"25% 63.227500 99.710000 \n",
|
||
"50% 87.870000 100.000000 \n",
|
||
"75% 99.000000 100.000000 \n",
|
||
"max 100.000000 100.000000 \n",
|
||
"\n",
|
||
" NSM WeekStatus Day_of_week Load_Type \n",
|
||
"count 28032.000000 28032 28032 28032 \n",
|
||
"unique NaN 2 7 3 \n",
|
||
"top NaN Weekday Monday Light_Load \n",
|
||
"freq NaN 19998 4087 14467 \n",
|
||
"mean 42761.429795 NaN NaN NaN \n",
|
||
"std 24944.585138 NaN NaN NaN \n",
|
||
"min 0.000000 NaN NaN NaN \n",
|
||
"25% 20700.000000 NaN NaN NaN \n",
|
||
"50% 43200.000000 NaN NaN NaN \n",
|
||
"75% 64800.000000 NaN NaN NaN \n",
|
||
"max 85500.000000 NaN NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"train_data.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "radical-score",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>date</th>\n",
|
||
" <th>Usage_kWh</th>\n",
|
||
" <th>Lagging_Current_Reactive.Power_kVarh</th>\n",
|
||
" <th>Leading_Current_Reactive_Power_kVarh</th>\n",
|
||
" <th>CO2(tCO2)</th>\n",
|
||
" <th>Lagging_Current_Power_Factor</th>\n",
|
||
" <th>Leading_Current_Power_Factor</th>\n",
|
||
" <th>NSM</th>\n",
|
||
" <th>WeekStatus</th>\n",
|
||
" <th>Day_of_week</th>\n",
|
||
" <th>Load_Type</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>3504</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504</td>\n",
|
||
" <td>3504</td>\n",
|
||
" <td>3504</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>3504</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>20/06/2018 13:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Wednesday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2522</td>\n",
|
||
" <td>527</td>\n",
|
||
" <td>1837</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>26.355685</td>\n",
|
||
" <td>12.374717</td>\n",
|
||
" <td>3.891093</td>\n",
|
||
" <td>0.011050</td>\n",
|
||
" <td>80.687751</td>\n",
|
||
" <td>84.082794</td>\n",
|
||
" <td>42594.092466</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>32.519749</td>\n",
|
||
" <td>15.830961</td>\n",
|
||
" <td>7.353028</td>\n",
|
||
" <td>0.015762</td>\n",
|
||
" <td>19.053018</td>\n",
|
||
" <td>30.614144</td>\n",
|
||
" <td>25222.804637</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2.480000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>40.290000</td>\n",
|
||
" <td>12.540000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3.192500</td>\n",
|
||
" <td>2.090000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>63.130000</td>\n",
|
||
" <td>99.562500</td>\n",
|
||
" <td>20700.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4.500000</td>\n",
|
||
" <td>4.900000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>88.210000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>42300.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>49.570000</td>\n",
|
||
" <td>20.700000</td>\n",
|
||
" <td>2.967500</td>\n",
|
||
" <td>0.020000</td>\n",
|
||
" <td>99.390000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>64800.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>153.140000</td>\n",
|
||
" <td>82.940000</td>\n",
|
||
" <td>27.650000</td>\n",
|
||
" <td>0.070000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>85500.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
|
||
"count 3504 3504.000000 3504.000000 \n",
|
||
"unique 3504 NaN NaN \n",
|
||
"top 20/06/2018 13:00 NaN NaN \n",
|
||
"freq 1 NaN NaN \n",
|
||
"mean NaN 26.355685 12.374717 \n",
|
||
"std NaN 32.519749 15.830961 \n",
|
||
"min NaN 2.480000 0.000000 \n",
|
||
"25% NaN 3.192500 2.090000 \n",
|
||
"50% NaN 4.500000 4.900000 \n",
|
||
"75% NaN 49.570000 20.700000 \n",
|
||
"max NaN 153.140000 82.940000 \n",
|
||
"\n",
|
||
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
|
||
"count 3504.000000 3504.000000 \n",
|
||
"unique NaN NaN \n",
|
||
"top NaN NaN \n",
|
||
"freq NaN NaN \n",
|
||
"mean 3.891093 0.011050 \n",
|
||
"std 7.353028 0.015762 \n",
|
||
"min 0.000000 0.000000 \n",
|
||
"25% 0.000000 0.000000 \n",
|
||
"50% 0.000000 0.000000 \n",
|
||
"75% 2.967500 0.020000 \n",
|
||
"max 27.650000 0.070000 \n",
|
||
"\n",
|
||
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
|
||
"count 3504.000000 3504.000000 \n",
|
||
"unique NaN NaN \n",
|
||
"top NaN NaN \n",
|
||
"freq NaN NaN \n",
|
||
"mean 80.687751 84.082794 \n",
|
||
"std 19.053018 30.614144 \n",
|
||
"min 40.290000 12.540000 \n",
|
||
"25% 63.130000 99.562500 \n",
|
||
"50% 88.210000 100.000000 \n",
|
||
"75% 99.390000 100.000000 \n",
|
||
"max 100.000000 100.000000 \n",
|
||
"\n",
|
||
" NSM WeekStatus Day_of_week Load_Type \n",
|
||
"count 3504.000000 3504 3504 3504 \n",
|
||
"unique NaN 2 7 3 \n",
|
||
"top NaN Weekday Wednesday Light_Load \n",
|
||
"freq NaN 2522 527 1837 \n",
|
||
"mean 42594.092466 NaN NaN NaN \n",
|
||
"std 25222.804637 NaN NaN NaN \n",
|
||
"min 0.000000 NaN NaN NaN \n",
|
||
"25% 20700.000000 NaN NaN NaN \n",
|
||
"50% 42300.000000 NaN NaN NaN \n",
|
||
"75% 64800.000000 NaN NaN NaN \n",
|
||
"max 85500.000000 NaN NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"test_data.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "attempted-lafayette",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>date</th>\n",
|
||
" <th>Usage_kWh</th>\n",
|
||
" <th>Lagging_Current_Reactive.Power_kVarh</th>\n",
|
||
" <th>Leading_Current_Reactive_Power_kVarh</th>\n",
|
||
" <th>CO2(tCO2)</th>\n",
|
||
" <th>Lagging_Current_Power_Factor</th>\n",
|
||
" <th>Leading_Current_Power_Factor</th>\n",
|
||
" <th>NSM</th>\n",
|
||
" <th>WeekStatus</th>\n",
|
||
" <th>Day_of_week</th>\n",
|
||
" <th>Load_Type</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>3504</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504.000000</td>\n",
|
||
" <td>3504</td>\n",
|
||
" <td>3504</td>\n",
|
||
" <td>3504</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>3504</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>16/11/2018 16:45</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Weekday</td>\n",
|
||
" <td>Tuesday</td>\n",
|
||
" <td>Light_Load</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2536</td>\n",
|
||
" <td>543</td>\n",
|
||
" <td>1768</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>28.791849</td>\n",
|
||
" <td>13.764709</td>\n",
|
||
" <td>3.818382</td>\n",
|
||
" <td>0.012212</td>\n",
|
||
" <td>80.931650</td>\n",
|
||
" <td>84.639817</td>\n",
|
||
" <td>42814.469178</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>34.115238</td>\n",
|
||
" <td>16.872400</td>\n",
|
||
" <td>7.325016</td>\n",
|
||
" <td>0.016499</td>\n",
|
||
" <td>18.696834</td>\n",
|
||
" <td>30.258743</td>\n",
|
||
" <td>24628.829557</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2.480000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>38.330000</td>\n",
|
||
" <td>13.050000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3.240000</td>\n",
|
||
" <td>2.380000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>64.112500</td>\n",
|
||
" <td>99.730000</td>\n",
|
||
" <td>21600.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4.720000</td>\n",
|
||
" <td>5.110000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>88.325000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>43200.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>53.227500</td>\n",
|
||
" <td>24.810000</td>\n",
|
||
" <td>1.917500</td>\n",
|
||
" <td>0.020000</td>\n",
|
||
" <td>98.792500</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>63900.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>146.880000</td>\n",
|
||
" <td>87.700000</td>\n",
|
||
" <td>27.540000</td>\n",
|
||
" <td>0.070000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" <td>85500.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
|
||
"count 3504 3504.000000 3504.000000 \n",
|
||
"unique 3504 NaN NaN \n",
|
||
"top 16/11/2018 16:45 NaN NaN \n",
|
||
"freq 1 NaN NaN \n",
|
||
"mean NaN 28.791849 13.764709 \n",
|
||
"std NaN 34.115238 16.872400 \n",
|
||
"min NaN 2.480000 0.000000 \n",
|
||
"25% NaN 3.240000 2.380000 \n",
|
||
"50% NaN 4.720000 5.110000 \n",
|
||
"75% NaN 53.227500 24.810000 \n",
|
||
"max NaN 146.880000 87.700000 \n",
|
||
"\n",
|
||
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
|
||
"count 3504.000000 3504.000000 \n",
|
||
"unique NaN NaN \n",
|
||
"top NaN NaN \n",
|
||
"freq NaN NaN \n",
|
||
"mean 3.818382 0.012212 \n",
|
||
"std 7.325016 0.016499 \n",
|
||
"min 0.000000 0.000000 \n",
|
||
"25% 0.000000 0.000000 \n",
|
||
"50% 0.000000 0.000000 \n",
|
||
"75% 1.917500 0.020000 \n",
|
||
"max 27.540000 0.070000 \n",
|
||
"\n",
|
||
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
|
||
"count 3504.000000 3504.000000 \n",
|
||
"unique NaN NaN \n",
|
||
"top NaN NaN \n",
|
||
"freq NaN NaN \n",
|
||
"mean 80.931650 84.639817 \n",
|
||
"std 18.696834 30.258743 \n",
|
||
"min 38.330000 13.050000 \n",
|
||
"25% 64.112500 99.730000 \n",
|
||
"50% 88.325000 100.000000 \n",
|
||
"75% 98.792500 100.000000 \n",
|
||
"max 100.000000 100.000000 \n",
|
||
"\n",
|
||
" NSM WeekStatus Day_of_week Load_Type \n",
|
||
"count 3504.000000 3504 3504 3504 \n",
|
||
"unique NaN 2 7 3 \n",
|
||
"top NaN Weekday Tuesday Light_Load \n",
|
||
"freq NaN 2536 543 1768 \n",
|
||
"mean 42814.469178 NaN NaN NaN \n",
|
||
"std 24628.829557 NaN NaN NaN \n",
|
||
"min 0.000000 NaN NaN NaN \n",
|
||
"25% 21600.000000 NaN NaN NaN \n",
|
||
"50% 43200.000000 NaN NaN NaN \n",
|
||
"75% 63900.000000 NaN NaN NaN \n",
|
||
"max 85500.000000 NaN NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"dev_data.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "banned-scottish",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"test_data.to_csv(\"steel_industry_data_test.csv\", encoding=\"utf-8\", index=False)\n",
|
||
"dev_data.to_csv(\"steel_industry_data_dev.csv\", encoding=\"utf-8\", index=False)\n",
|
||
"train_data.to_csv(\"steel_industry_data_train.csv\", encoding=\"utf-8\", index=False)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.9.2"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
} |