{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"id": "expected-payroll",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: kaggle in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (1.5.12)\n",
"Requirement already satisfied: six>=1.10 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (1.15.0)\n",
"Requirement already satisfied: certifi in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (2021.10.8)\n",
"Requirement already satisfied: python-dateutil in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (2.8.1)\n",
"Requirement already satisfied: requests in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (2.27.1)\n",
"Requirement already satisfied: tqdm in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (4.59.0)\n",
"Requirement already satisfied: python-slugify in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (6.1.1)\n",
"Requirement already satisfied: urllib3 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (1.26.9)\n",
"Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->kaggle) (3.3)\n",
"Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->kaggle) (2.0.12)\n",
"Requirement already satisfied: pandas in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (1.4.1)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (2022.1)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (2.8.1)\n",
"Requirement already satisfied: numpy>=1.18.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (1.20.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.15.0)\n",
"Requirement already satisfied: seaborn in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (0.11.2)\n",
"Requirement already satisfied: pandas>=0.23 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (1.4.1)\n",
"Requirement already satisfied: numpy>=1.15 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (1.20.1)\n",
"Requirement already satisfied: scipy>=1.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (1.6.1)\n",
"Requirement already satisfied: matplotlib>=2.2 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (3.5.1)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (20.9)\n",
"Requirement already satisfied: pyparsing>=2.2.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (4.31.1)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (9.0.1)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.4.0)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas>=0.23->seaborn) (2022.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.15.0)\n"
]
}
],
"source": [
"!pip install kaggle\n",
"!pip install pandas\n",
"!pip install seaborn"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "genetic-plaintiff",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading steel-industry-energy-consumption.zip to D:\\UAM zajecia\\IUM\\ium_470623\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
" 0%| | 0.00/484k [00:00, ?B/s]\n",
"100%|##########| 484k/484k [00:00<00:00, 3.32MB/s]\n",
"100%|##########| 484k/484k [00:00<00:00, 3.29MB/s]\n"
]
}
],
"source": [
"!kaggle datasets download -d csafrit2/steel-industry-energy-consumption"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "compatible-following",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: steel-industry-energy-consumption.zip\n",
" inflating: Steel_industry_data.csv \n"
]
}
],
"source": [
"!unzip -o steel-industry-energy-consumption.zip"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "interstate-ethnic",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" Usage_kWh | \n",
" Lagging_Current_Reactive.Power_kVarh | \n",
" Leading_Current_Reactive_Power_kVarh | \n",
" CO2(tCO2) | \n",
" Lagging_Current_Power_Factor | \n",
" Leading_Current_Power_Factor | \n",
" NSM | \n",
" WeekStatus | \n",
" Day_of_week | \n",
" Load_Type | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 01/01/2018 00:15 | \n",
" 3.17 | \n",
" 2.95 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 73.21 | \n",
" 100.00 | \n",
" 900 | \n",
" Weekday | \n",
" Monday | \n",
" Light_Load | \n",
"
\n",
" \n",
" 1 | \n",
" 01/01/2018 00:30 | \n",
" 4.00 | \n",
" 4.46 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 66.77 | \n",
" 100.00 | \n",
" 1800 | \n",
" Weekday | \n",
" Monday | \n",
" Light_Load | \n",
"
\n",
" \n",
" 2 | \n",
" 01/01/2018 00:45 | \n",
" 3.24 | \n",
" 3.28 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 70.28 | \n",
" 100.00 | \n",
" 2700 | \n",
" Weekday | \n",
" Monday | \n",
" Light_Load | \n",
"
\n",
" \n",
" 3 | \n",
" 01/01/2018 01:00 | \n",
" 3.31 | \n",
" 3.56 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 68.09 | \n",
" 100.00 | \n",
" 3600 | \n",
" Weekday | \n",
" Monday | \n",
" Light_Load | \n",
"
\n",
" \n",
" 4 | \n",
" 01/01/2018 01:15 | \n",
" 3.82 | \n",
" 4.50 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 64.72 | \n",
" 100.00 | \n",
" 4500 | \n",
" Weekday | \n",
" Monday | \n",
" Light_Load | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 35035 | \n",
" 31/12/2018 23:00 | \n",
" 3.85 | \n",
" 4.86 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 62.10 | \n",
" 100.00 | \n",
" 82800 | \n",
" Weekday | \n",
" Monday | \n",
" Light_Load | \n",
"
\n",
" \n",
" 35036 | \n",
" 31/12/2018 23:15 | \n",
" 3.74 | \n",
" 3.74 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 70.71 | \n",
" 100.00 | \n",
" 83700 | \n",
" Weekday | \n",
" Monday | \n",
" Light_Load | \n",
"
\n",
" \n",
" 35037 | \n",
" 31/12/2018 23:30 | \n",
" 3.78 | \n",
" 3.17 | \n",
" 0.07 | \n",
" 0.0 | \n",
" 76.62 | \n",
" 99.98 | \n",
" 84600 | \n",
" Weekday | \n",
" Monday | \n",
" Light_Load | \n",
"
\n",
" \n",
" 35038 | \n",
" 31/12/2018 23:45 | \n",
" 3.78 | \n",
" 3.06 | \n",
" 0.11 | \n",
" 0.0 | \n",
" 77.72 | \n",
" 99.96 | \n",
" 85500 | \n",
" Weekday | \n",
" Monday | \n",
" Light_Load | \n",
"
\n",
" \n",
" 35039 | \n",
" 31/12/2018 00:00 | \n",
" 3.67 | \n",
" 3.02 | \n",
" 0.07 | \n",
" 0.0 | \n",
" 77.22 | \n",
" 99.98 | \n",
" 0 | \n",
" Weekday | \n",
" Monday | \n",
" Light_Load | \n",
"
\n",
" \n",
"
\n",
"
35040 rows × 11 columns
\n",
"
"
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
"0 01/01/2018 00:15 3.17 2.95 \n",
"1 01/01/2018 00:30 4.00 4.46 \n",
"2 01/01/2018 00:45 3.24 3.28 \n",
"3 01/01/2018 01:00 3.31 3.56 \n",
"4 01/01/2018 01:15 3.82 4.50 \n",
"... ... ... ... \n",
"35035 31/12/2018 23:00 3.85 4.86 \n",
"35036 31/12/2018 23:15 3.74 3.74 \n",
"35037 31/12/2018 23:30 3.78 3.17 \n",
"35038 31/12/2018 23:45 3.78 3.06 \n",
"35039 31/12/2018 00:00 3.67 3.02 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
"0 0.00 0.0 \n",
"1 0.00 0.0 \n",
"2 0.00 0.0 \n",
"3 0.00 0.0 \n",
"4 0.00 0.0 \n",
"... ... ... \n",
"35035 0.00 0.0 \n",
"35036 0.00 0.0 \n",
"35037 0.07 0.0 \n",
"35038 0.11 0.0 \n",
"35039 0.07 0.0 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor NSM \\\n",
"0 73.21 100.00 900 \n",
"1 66.77 100.00 1800 \n",
"2 70.28 100.00 2700 \n",
"3 68.09 100.00 3600 \n",
"4 64.72 100.00 4500 \n",
"... ... ... ... \n",
"35035 62.10 100.00 82800 \n",
"35036 70.71 100.00 83700 \n",
"35037 76.62 99.98 84600 \n",
"35038 77.72 99.96 85500 \n",
"35039 77.22 99.98 0 \n",
"\n",
" WeekStatus Day_of_week Load_Type \n",
"0 Weekday Monday Light_Load \n",
"1 Weekday Monday Light_Load \n",
"2 Weekday Monday Light_Load \n",
"3 Weekday Monday Light_Load \n",
"4 Weekday Monday Light_Load \n",
"... ... ... ... \n",
"35035 Weekday Monday Light_Load \n",
"35036 Weekday Monday Light_Load \n",
"35037 Weekday Monday Light_Load \n",
"35038 Weekday Monday Light_Load \n",
"35039 Weekday Monday Light_Load \n",
"\n",
"[35040 rows x 11 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"energy_data=pd.read_csv('Steel_industry_data.csv')\n",
"energy_data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "heated-spectacular",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" Usage_kWh | \n",
" Lagging_Current_Reactive.Power_kVarh | \n",
" Leading_Current_Reactive_Power_kVarh | \n",
" CO2(tCO2) | \n",
" Lagging_Current_Power_Factor | \n",
" Leading_Current_Power_Factor | \n",
" NSM | \n",
" WeekStatus | \n",
" Day_of_week | \n",
" Load_Type | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 35040 | \n",
" 35040.000000 | \n",
" 35040.000000 | \n",
" 35040.000000 | \n",
" 35040.000000 | \n",
" 35040.000000 | \n",
" 35040.000000 | \n",
" 35040.000000 | \n",
" 35040 | \n",
" 35040 | \n",
" 35040 | \n",
"
\n",
" \n",
" unique | \n",
" 35040 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2 | \n",
" 7 | \n",
" 3 | \n",
"
\n",
" \n",
" top | \n",
" 01/01/2018 00:15 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" Weekday | \n",
" Monday | \n",
" Light_Load | \n",
"
\n",
" \n",
" freq | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 25056 | \n",
" 5088 | \n",
" 18072 | \n",
"
\n",
" \n",
" mean | \n",
" NaN | \n",
" 27.386892 | \n",
" 13.035384 | \n",
" 3.870949 | \n",
" 0.011524 | \n",
" 80.578056 | \n",
" 84.367870 | \n",
" 42750.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" std | \n",
" NaN | \n",
" 33.444380 | \n",
" 16.306000 | \n",
" 7.424463 | \n",
" 0.016151 | \n",
" 18.921322 | \n",
" 30.456535 | \n",
" 24940.534317 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" min | \n",
" NaN | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 25% | \n",
" NaN | \n",
" 3.200000 | \n",
" 2.300000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 63.320000 | \n",
" 99.700000 | \n",
" 21375.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 50% | \n",
" NaN | \n",
" 4.570000 | \n",
" 5.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 87.960000 | \n",
" 100.000000 | \n",
" 42750.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 75% | \n",
" NaN | \n",
" 51.237500 | \n",
" 22.640000 | \n",
" 2.090000 | \n",
" 0.020000 | \n",
" 99.022500 | \n",
" 100.000000 | \n",
" 64125.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" max | \n",
" NaN | \n",
" 157.180000 | \n",
" 96.910000 | \n",
" 27.760000 | \n",
" 0.070000 | \n",
" 100.000000 | \n",
" 100.000000 | \n",
" 85500.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
"count 35040 35040.000000 35040.000000 \n",
"unique 35040 NaN NaN \n",
"top 01/01/2018 00:15 NaN NaN \n",
"freq 1 NaN NaN \n",
"mean NaN 27.386892 13.035384 \n",
"std NaN 33.444380 16.306000 \n",
"min NaN 0.000000 0.000000 \n",
"25% NaN 3.200000 2.300000 \n",
"50% NaN 4.570000 5.000000 \n",
"75% NaN 51.237500 22.640000 \n",
"max NaN 157.180000 96.910000 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
"count 35040.000000 35040.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 3.870949 0.011524 \n",
"std 7.424463 0.016151 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
"75% 2.090000 0.020000 \n",
"max 27.760000 0.070000 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
"count 35040.000000 35040.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 80.578056 84.367870 \n",
"std 18.921322 30.456535 \n",
"min 0.000000 0.000000 \n",
"25% 63.320000 99.700000 \n",
"50% 87.960000 100.000000 \n",
"75% 99.022500 100.000000 \n",
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
"count 35040.000000 35040 35040 35040 \n",
"unique NaN 2 7 3 \n",
"top NaN Weekday Monday Light_Load \n",
"freq NaN 25056 5088 18072 \n",
"mean 42750.000000 NaN NaN NaN \n",
"std 24940.534317 NaN NaN NaN \n",
"min 0.000000 NaN NaN NaN \n",
"25% 21375.000000 NaN NaN NaN \n",
"50% 42750.000000 NaN NaN NaN \n",
"75% 64125.000000 NaN NaN NaN \n",
"max 85500.000000 NaN NaN NaN "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"energy_data.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "loved-delight",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training set size:\n",
"(31536, 11)\n",
"Testing set size:\n",
"(1752, 11)\n",
"Dev set size:\n",
"(1752, 11)\n"
]
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"train_data, test_data = train_test_split(energy_data, test_size=3504, random_state=1)\n",
"test_data, dev_data = train_test_split(test_data, test_size=1752, random_state=1)\n",
"print('Training set size:')\n",
"print(train_data.shape)\n",
"print('Testing set size:')\n",
"print(test_data.shape)\n",
"print('Dev set size:')\n",
"print(dev_data.shape)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "formed-virginia",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" Usage_kWh | \n",
" Lagging_Current_Reactive.Power_kVarh | \n",
" Leading_Current_Reactive_Power_kVarh | \n",
" CO2(tCO2) | \n",
" Lagging_Current_Power_Factor | \n",
" Leading_Current_Power_Factor | \n",
" NSM | \n",
" WeekStatus | \n",
" Day_of_week | \n",
" Load_Type | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 31536 | \n",
" 31536.000000 | \n",
" 31536.000000 | \n",
" 31536.000000 | \n",
" 31536.000000 | \n",
" 31536.000000 | \n",
" 31536.000000 | \n",
" 31536.000000 | \n",
" 31536 | \n",
" 31536 | \n",
" 31536 | \n",
"
\n",
" \n",
" unique | \n",
" 31536 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2 | \n",
" 7 | \n",
" 3 | \n",
"
\n",
" \n",
" top | \n",
" 30/01/2018 00:15 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" Weekday | \n",
" Monday | \n",
" Light_Load | \n",
"
\n",
" \n",
" freq | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 22514 | \n",
" 4560 | \n",
" 16280 | \n",
"
\n",
" \n",
" mean | \n",
" NaN | \n",
" 27.369449 | \n",
" 13.037946 | \n",
" 3.866059 | \n",
" 0.011513 | \n",
" 80.525058 | \n",
" 84.410086 | \n",
" 42707.363014 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" std | \n",
" NaN | \n",
" 33.473304 | \n",
" 16.302910 | \n",
" 7.434250 | \n",
" 0.016159 | \n",
" 18.929571 | \n",
" 30.436675 | \n",
" 24968.193911 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" min | \n",
" NaN | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 25% | \n",
" NaN | \n",
" 3.200000 | \n",
" 2.330000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 63.200000 | \n",
" 99.720000 | \n",
" 20700.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 50% | \n",
" NaN | \n",
" 4.570000 | \n",
" 5.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 87.900000 | \n",
" 100.000000 | \n",
" 42300.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 75% | \n",
" NaN | \n",
" 51.230000 | \n",
" 22.650000 | \n",
" 1.980000 | \n",
" 0.020000 | \n",
" 98.970000 | \n",
" 100.000000 | \n",
" 63900.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" max | \n",
" NaN | \n",
" 157.180000 | \n",
" 96.910000 | \n",
" 27.760000 | \n",
" 0.070000 | \n",
" 100.000000 | \n",
" 100.000000 | \n",
" 85500.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
"count 31536 31536.000000 31536.000000 \n",
"unique 31536 NaN NaN \n",
"top 30/01/2018 00:15 NaN NaN \n",
"freq 1 NaN NaN \n",
"mean NaN 27.369449 13.037946 \n",
"std NaN 33.473304 16.302910 \n",
"min NaN 0.000000 0.000000 \n",
"25% NaN 3.200000 2.330000 \n",
"50% NaN 4.570000 5.000000 \n",
"75% NaN 51.230000 22.650000 \n",
"max NaN 157.180000 96.910000 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
"count 31536.000000 31536.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 3.866059 0.011513 \n",
"std 7.434250 0.016159 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
"75% 1.980000 0.020000 \n",
"max 27.760000 0.070000 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
"count 31536.000000 31536.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 80.525058 84.410086 \n",
"std 18.929571 30.436675 \n",
"min 0.000000 0.000000 \n",
"25% 63.200000 99.720000 \n",
"50% 87.900000 100.000000 \n",
"75% 98.970000 100.000000 \n",
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
"count 31536.000000 31536 31536 31536 \n",
"unique NaN 2 7 3 \n",
"top NaN Weekday Monday Light_Load \n",
"freq NaN 22514 4560 16280 \n",
"mean 42707.363014 NaN NaN NaN \n",
"std 24968.193911 NaN NaN NaN \n",
"min 0.000000 NaN NaN NaN \n",
"25% 20700.000000 NaN NaN NaN \n",
"50% 42300.000000 NaN NaN NaN \n",
"75% 63900.000000 NaN NaN NaN \n",
"max 85500.000000 NaN NaN NaN "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_data.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "radical-score",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" Usage_kWh | \n",
" Lagging_Current_Reactive.Power_kVarh | \n",
" Leading_Current_Reactive_Power_kVarh | \n",
" CO2(tCO2) | \n",
" Lagging_Current_Power_Factor | \n",
" Leading_Current_Power_Factor | \n",
" NSM | \n",
" WeekStatus | \n",
" Day_of_week | \n",
" Load_Type | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1752 | \n",
" 1752.000000 | \n",
" 1752.000000 | \n",
" 1752.000000 | \n",
" 1752.000000 | \n",
" 1752.000000 | \n",
" 1752.000000 | \n",
" 1752.000000 | \n",
" 1752 | \n",
" 1752 | \n",
" 1752 | \n",
"
\n",
" \n",
" unique | \n",
" 1752 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2 | \n",
" 7 | \n",
" 3 | \n",
"
\n",
" \n",
" top | \n",
" 07/05/2018 06:00 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" Weekday | \n",
" Tuesday | \n",
" Light_Load | \n",
"
\n",
" \n",
" freq | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1268 | \n",
" 291 | \n",
" 898 | \n",
"
\n",
" \n",
" mean | \n",
" NaN | \n",
" 27.330982 | \n",
" 12.649024 | \n",
" 3.949281 | \n",
" 0.011530 | \n",
" 81.364526 | \n",
" 83.630702 | \n",
" 43080.821918 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" std | \n",
" NaN | \n",
" 33.484216 | \n",
" 16.185283 | \n",
" 7.298637 | \n",
" 0.016224 | \n",
" 18.758338 | \n",
" 30.801180 | \n",
" 24944.325392 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" min | \n",
" NaN | \n",
" 2.480000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 41.120000 | \n",
" 12.540000 | \n",
" 0.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 25% | \n",
" NaN | \n",
" 3.200000 | \n",
" 1.392500 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 64.630000 | \n",
" 99.180000 | \n",
" 21600.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 50% | \n",
" NaN | \n",
" 4.570000 | \n",
" 4.930000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 88.955000 | \n",
" 100.000000 | \n",
" 43200.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 75% | \n",
" NaN | \n",
" 49.870000 | \n",
" 21.240000 | \n",
" 3.837500 | \n",
" 0.020000 | \n",
" 99.852500 | \n",
" 100.000000 | \n",
" 64800.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" max | \n",
" NaN | \n",
" 143.930000 | \n",
" 87.700000 | \n",
" 27.540000 | \n",
" 0.070000 | \n",
" 100.000000 | \n",
" 100.000000 | \n",
" 85500.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
"count 1752 1752.000000 1752.000000 \n",
"unique 1752 NaN NaN \n",
"top 07/05/2018 06:00 NaN NaN \n",
"freq 1 NaN NaN \n",
"mean NaN 27.330982 12.649024 \n",
"std NaN 33.484216 16.185283 \n",
"min NaN 2.480000 0.000000 \n",
"25% NaN 3.200000 1.392500 \n",
"50% NaN 4.570000 4.930000 \n",
"75% NaN 49.870000 21.240000 \n",
"max NaN 143.930000 87.700000 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
"count 1752.000000 1752.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 3.949281 0.011530 \n",
"std 7.298637 0.016224 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
"75% 3.837500 0.020000 \n",
"max 27.540000 0.070000 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
"count 1752.000000 1752.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 81.364526 83.630702 \n",
"std 18.758338 30.801180 \n",
"min 41.120000 12.540000 \n",
"25% 64.630000 99.180000 \n",
"50% 88.955000 100.000000 \n",
"75% 99.852500 100.000000 \n",
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
"count 1752.000000 1752 1752 1752 \n",
"unique NaN 2 7 3 \n",
"top NaN Weekday Tuesday Light_Load \n",
"freq NaN 1268 291 898 \n",
"mean 43080.821918 NaN NaN NaN \n",
"std 24944.325392 NaN NaN NaN \n",
"min 0.000000 NaN NaN NaN \n",
"25% 21600.000000 NaN NaN NaN \n",
"50% 43200.000000 NaN NaN NaN \n",
"75% 64800.000000 NaN NaN NaN \n",
"max 85500.000000 NaN NaN NaN "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_data.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "attempted-lafayette",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" Usage_kWh | \n",
" Lagging_Current_Reactive.Power_kVarh | \n",
" Leading_Current_Reactive_Power_kVarh | \n",
" CO2(tCO2) | \n",
" Lagging_Current_Power_Factor | \n",
" Leading_Current_Power_Factor | \n",
" NSM | \n",
" WeekStatus | \n",
" Day_of_week | \n",
" Load_Type | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1752 | \n",
" 1752.000000 | \n",
" 1752.000000 | \n",
" 1752.000000 | \n",
" 1752.000000 | \n",
" 1752.000000 | \n",
" 1752.000000 | \n",
" 1752.000000 | \n",
" 1752 | \n",
" 1752 | \n",
" 1752 | \n",
"
\n",
" \n",
" unique | \n",
" 1752 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 2 | \n",
" 7 | \n",
" 3 | \n",
"
\n",
" \n",
" top | \n",
" 02/06/2018 02:00 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" Weekday | \n",
" Monday | \n",
" Light_Load | \n",
"
\n",
" \n",
" freq | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1274 | \n",
" 275 | \n",
" 894 | \n",
"
\n",
" \n",
" mean | \n",
" NaN | \n",
" 27.756787 | \n",
" 13.375628 | \n",
" 3.880634 | \n",
" 0.011729 | \n",
" 80.745548 | \n",
" 84.345154 | \n",
" 43186.643836 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" std | \n",
" NaN | \n",
" 32.895802 | \n",
" 16.482148 | \n",
" 7.376468 | \n",
" 0.015943 | \n",
" 18.927378 | \n",
" 30.475427 | \n",
" 24440.888112 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" min | \n",
" NaN | \n",
" 2.520000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 38.330000 | \n",
" 14.070000 | \n",
" 0.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 25% | \n",
" NaN | \n",
" 3.200000 | \n",
" 2.270000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 63.942500 | \n",
" 99.690000 | \n",
" 22500.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 50% | \n",
" NaN | \n",
" 4.680000 | \n",
" 5.110000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 87.940000 | \n",
" 100.000000 | \n",
" 43200.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 75% | \n",
" NaN | \n",
" 52.187500 | \n",
" 24.050000 | \n",
" 2.177500 | \n",
" 0.020000 | \n",
" 99.030000 | \n",
" 100.000000 | \n",
" 63900.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" max | \n",
" NaN | \n",
" 139.030000 | \n",
" 80.750000 | \n",
" 27.580000 | \n",
" 0.060000 | \n",
" 100.000000 | \n",
" 100.000000 | \n",
" 85500.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
"count 1752 1752.000000 1752.000000 \n",
"unique 1752 NaN NaN \n",
"top 02/06/2018 02:00 NaN NaN \n",
"freq 1 NaN NaN \n",
"mean NaN 27.756787 13.375628 \n",
"std NaN 32.895802 16.482148 \n",
"min NaN 2.520000 0.000000 \n",
"25% NaN 3.200000 2.270000 \n",
"50% NaN 4.680000 5.110000 \n",
"75% NaN 52.187500 24.050000 \n",
"max NaN 139.030000 80.750000 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
"count 1752.000000 1752.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 3.880634 0.011729 \n",
"std 7.376468 0.015943 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
"75% 2.177500 0.020000 \n",
"max 27.580000 0.060000 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
"count 1752.000000 1752.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 80.745548 84.345154 \n",
"std 18.927378 30.475427 \n",
"min 38.330000 14.070000 \n",
"25% 63.942500 99.690000 \n",
"50% 87.940000 100.000000 \n",
"75% 99.030000 100.000000 \n",
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
"count 1752.000000 1752 1752 1752 \n",
"unique NaN 2 7 3 \n",
"top NaN Weekday Monday Light_Load \n",
"freq NaN 1274 275 894 \n",
"mean 43186.643836 NaN NaN NaN \n",
"std 24440.888112 NaN NaN NaN \n",
"min 0.000000 NaN NaN NaN \n",
"25% 22500.000000 NaN NaN NaN \n",
"50% 43200.000000 NaN NaN NaN \n",
"75% 63900.000000 NaN NaN NaN \n",
"max 85500.000000 NaN NaN NaN "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dev_data.describe(include='all')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}