{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "expected-payroll", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: kaggle in c:\\users\\cgala\\anaconda3\\lib\\site-packages (1.5.12)\n", "Requirement already satisfied: tqdm in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (4.50.2)\n", "Requirement already satisfied: requests in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2.24.0)\n", "Requirement already satisfied: certifi in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2020.6.20)\n", "Requirement already satisfied: six>=1.10 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (1.15.0)\n", "Requirement already satisfied: python-slugify in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (6.1.1)\n", "Requirement already satisfied: urllib3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (1.25.11)\n", "Requirement already satisfied: python-dateutil in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2.8.1)\n", "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from requests->kaggle) (2.10)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from requests->kaggle) (3.0.4)\n", "Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n", "Requirement already satisfied: pandas in c:\\users\\cgala\\anaconda3\\lib\\site-packages (1.1.3)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (2.8.1)\n", "Requirement already satisfied: numpy>=1.15.4 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (1.19.2)\n", "Requirement already satisfied: pytz>=2017.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (2020.1)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n", "Requirement already satisfied: seaborn in c:\\users\\cgala\\anaconda3\\lib\\site-packages (0.11.0)\n", "Requirement already satisfied: numpy>=1.15 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.19.2)\n", "Requirement already satisfied: scipy>=1.0 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.5.2)\n", "Requirement already satisfied: matplotlib>=2.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (3.3.2)\n", "Requirement already satisfied: pandas>=0.23 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.1.3)\n", "Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.3.0)\n", "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n", "Requirement already satisfied: cycler>=0.10 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", "Requirement already satisfied: pillow>=6.2.0 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (8.0.1)\n", "Requirement already satisfied: certifi>=2020.06.20 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2020.6.20)\n", "Requirement already satisfied: pytz>=2017.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas>=0.23->seaborn) (2020.1)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-dateutil>=2.1->matplotlib>=2.2->seaborn) (1.15.0)\n" ] } ], "source": [ "!pip install kaggle\n", "!pip install pandas\n", "!pip install seaborn" ] }, { "cell_type": "code", "execution_count": 3, "id": "genetic-plaintiff", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading steel-industry-energy-consumption.zip to D:\\UAM zajecia\\IUM\\ium_470623\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", " 0%| | 0.00/484k [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateUsage_kWhLagging_Current_Reactive.Power_kVarhLeading_Current_Reactive_Power_kVarhCO2(tCO2)Lagging_Current_Power_FactorLeading_Current_Power_FactorNSMWeekStatusDay_of_weekLoad_Type
001/01/2018 00:153.172.950.000.073.21100.00900WeekdayMondayLight_Load
101/01/2018 00:304.004.460.000.066.77100.001800WeekdayMondayLight_Load
201/01/2018 00:453.243.280.000.070.28100.002700WeekdayMondayLight_Load
301/01/2018 01:003.313.560.000.068.09100.003600WeekdayMondayLight_Load
401/01/2018 01:153.824.500.000.064.72100.004500WeekdayMondayLight_Load
....................................
3503531/12/2018 23:003.854.860.000.062.10100.0082800WeekdayMondayLight_Load
3503631/12/2018 23:153.743.740.000.070.71100.0083700WeekdayMondayLight_Load
3503731/12/2018 23:303.783.170.070.076.6299.9884600WeekdayMondayLight_Load
3503831/12/2018 23:453.783.060.110.077.7299.9685500WeekdayMondayLight_Load
3503931/12/2018 00:003.673.020.070.077.2299.980WeekdayMondayLight_Load
\n", "

35040 rows × 11 columns

\n", "" ], "text/plain": [ " date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n", "0 01/01/2018 00:15 3.17 2.95 \n", "1 01/01/2018 00:30 4.00 4.46 \n", "2 01/01/2018 00:45 3.24 3.28 \n", "3 01/01/2018 01:00 3.31 3.56 \n", "4 01/01/2018 01:15 3.82 4.50 \n", "... ... ... ... \n", "35035 31/12/2018 23:00 3.85 4.86 \n", "35036 31/12/2018 23:15 3.74 3.74 \n", "35037 31/12/2018 23:30 3.78 3.17 \n", "35038 31/12/2018 23:45 3.78 3.06 \n", "35039 31/12/2018 00:00 3.67 3.02 \n", "\n", " Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n", "0 0.00 0.0 \n", "1 0.00 0.0 \n", "2 0.00 0.0 \n", "3 0.00 0.0 \n", "4 0.00 0.0 \n", "... ... ... \n", "35035 0.00 0.0 \n", "35036 0.00 0.0 \n", "35037 0.07 0.0 \n", "35038 0.11 0.0 \n", "35039 0.07 0.0 \n", "\n", " Lagging_Current_Power_Factor Leading_Current_Power_Factor NSM \\\n", "0 73.21 100.00 900 \n", "1 66.77 100.00 1800 \n", "2 70.28 100.00 2700 \n", "3 68.09 100.00 3600 \n", "4 64.72 100.00 4500 \n", "... ... ... ... \n", "35035 62.10 100.00 82800 \n", "35036 70.71 100.00 83700 \n", "35037 76.62 99.98 84600 \n", "35038 77.72 99.96 85500 \n", "35039 77.22 99.98 0 \n", "\n", " WeekStatus Day_of_week Load_Type \n", "0 Weekday Monday Light_Load \n", "1 Weekday Monday Light_Load \n", "2 Weekday Monday Light_Load \n", "3 Weekday Monday Light_Load \n", "4 Weekday Monday Light_Load \n", "... ... ... ... \n", "35035 Weekday Monday Light_Load \n", "35036 Weekday Monday Light_Load \n", "35037 Weekday Monday Light_Load \n", "35038 Weekday Monday Light_Load \n", "35039 Weekday Monday Light_Load \n", "\n", "[35040 rows x 11 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "energy_data=pd.read_csv('Steel_industry_data.csv')\n", "energy_data" ] }, { "cell_type": "code", "execution_count": 5, "id": "heated-spectacular", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateUsage_kWhLagging_Current_Reactive.Power_kVarhLeading_Current_Reactive_Power_kVarhCO2(tCO2)Lagging_Current_Power_FactorLeading_Current_Power_FactorNSMWeekStatusDay_of_weekLoad_Type
count3504035040.00000035040.00000035040.00000035040.00000035040.00000035040.00000035040.000000350403504035040
unique35040NaNNaNNaNNaNNaNNaNNaN273
top01/01/2018 00:15NaNNaNNaNNaNNaNNaNNaNWeekdayMondayLight_Load
freq1NaNNaNNaNNaNNaNNaNNaN25056508818072
meanNaN27.38689213.0353843.8709490.01152480.57805684.36787042750.000000NaNNaNNaN
stdNaN33.44438016.3060007.4244630.01615118.92132230.45653524940.534317NaNNaNNaN
minNaN0.0000000.0000000.0000000.0000000.0000000.0000000.000000NaNNaNNaN
25%NaN3.2000002.3000000.0000000.00000063.32000099.70000021375.000000NaNNaNNaN
50%NaN4.5700005.0000000.0000000.00000087.960000100.00000042750.000000NaNNaNNaN
75%NaN51.23750022.6400002.0900000.02000099.022500100.00000064125.000000NaNNaNNaN
maxNaN157.18000096.91000027.7600000.070000100.000000100.00000085500.000000NaNNaNNaN
\n", "
" ], "text/plain": [ " date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n", "count 35040 35040.000000 35040.000000 \n", "unique 35040 NaN NaN \n", "top 01/01/2018 00:15 NaN NaN \n", "freq 1 NaN NaN \n", "mean NaN 27.386892 13.035384 \n", "std NaN 33.444380 16.306000 \n", "min NaN 0.000000 0.000000 \n", "25% NaN 3.200000 2.300000 \n", "50% NaN 4.570000 5.000000 \n", "75% NaN 51.237500 22.640000 \n", "max NaN 157.180000 96.910000 \n", "\n", " Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n", "count 35040.000000 35040.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 3.870949 0.011524 \n", "std 7.424463 0.016151 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 2.090000 0.020000 \n", "max 27.760000 0.070000 \n", "\n", " Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n", "count 35040.000000 35040.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 80.578056 84.367870 \n", "std 18.921322 30.456535 \n", "min 0.000000 0.000000 \n", "25% 63.320000 99.700000 \n", "50% 87.960000 100.000000 \n", "75% 99.022500 100.000000 \n", "max 100.000000 100.000000 \n", "\n", " NSM WeekStatus Day_of_week Load_Type \n", "count 35040.000000 35040 35040 35040 \n", "unique NaN 2 7 3 \n", "top NaN Weekday Monday Light_Load \n", "freq NaN 25056 5088 18072 \n", "mean 42750.000000 NaN NaN NaN \n", "std 24940.534317 NaN NaN NaN \n", "min 0.000000 NaN NaN NaN \n", "25% 21375.000000 NaN NaN NaN \n", "50% 42750.000000 NaN NaN NaN \n", "75% 64125.000000 NaN NaN NaN \n", "max 85500.000000 NaN NaN NaN " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "energy_data.describe(include='all')" ] }, { "cell_type": "code", "execution_count": 6, "id": "loved-delight", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training set size:\n", "(28032, 11)\n", "Testing set size:\n", "(3504, 11)\n", "Dev set size:\n", "(3504, 11)\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "train_data, test_data = train_test_split(energy_data, test_size=7008, random_state=1)\n", "test_data, dev_data = train_test_split(test_data, test_size=3504, random_state=1)\n", "print('Training set size:')\n", "print(train_data.shape)\n", "print('Testing set size:')\n", "print(test_data.shape)\n", "print('Dev set size:')\n", "print(dev_data.shape)" ] }, { "cell_type": "code", "execution_count": 7, "id": "formed-virginia", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateUsage_kWhLagging_Current_Reactive.Power_kVarhLeading_Current_Reactive_Power_kVarhCO2(tCO2)Lagging_Current_Power_FactorLeading_Current_Power_FactorNSMWeekStatusDay_of_weekLoad_Type
count2803228032.00000028032.00000028032.00000028032.00000028032.00000028032.00000028032.000000280322803228032
unique28032NaNNaNNaNNaNNaNNaNNaN273
top07/08/2018 14:15NaNNaNNaNNaNNaNNaNNaNWeekdayMondayLight_Load
freq1NaNNaNNaNNaNNaNNaNNaN19998408714467
meanNaN27.34017413.0268013.8750010.01149880.52014584.36951142761.429795NaNNaNNaN
stdNaN33.46913016.2893487.4458980.01615318.93282530.46219324944.585138NaNNaNNaN
minNaN0.0000000.0000000.0000000.0000000.0000000.0000000.000000NaNNaNNaN
25%NaN3.2000002.3000000.0000000.00000063.22750099.71000020700.000000NaNNaNNaN
50%NaN4.5700005.0000000.0000000.00000087.870000100.00000043200.000000NaNNaNNaN
75%NaN51.19000022.7500002.0200000.02000099.000000100.00000064800.000000NaNNaNNaN
maxNaN157.18000096.91000027.7600000.070000100.000000100.00000085500.000000NaNNaNNaN
\n", "
" ], "text/plain": [ " date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n", "count 28032 28032.000000 28032.000000 \n", "unique 28032 NaN NaN \n", "top 07/08/2018 14:15 NaN NaN \n", "freq 1 NaN NaN \n", "mean NaN 27.340174 13.026801 \n", "std NaN 33.469130 16.289348 \n", "min NaN 0.000000 0.000000 \n", "25% NaN 3.200000 2.300000 \n", "50% NaN 4.570000 5.000000 \n", "75% NaN 51.190000 22.750000 \n", "max NaN 157.180000 96.910000 \n", "\n", " Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n", "count 28032.000000 28032.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 3.875001 0.011498 \n", "std 7.445898 0.016153 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 2.020000 0.020000 \n", "max 27.760000 0.070000 \n", "\n", " Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n", "count 28032.000000 28032.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 80.520145 84.369511 \n", "std 18.932825 30.462193 \n", "min 0.000000 0.000000 \n", "25% 63.227500 99.710000 \n", "50% 87.870000 100.000000 \n", "75% 99.000000 100.000000 \n", "max 100.000000 100.000000 \n", "\n", " NSM WeekStatus Day_of_week Load_Type \n", "count 28032.000000 28032 28032 28032 \n", "unique NaN 2 7 3 \n", "top NaN Weekday Monday Light_Load \n", "freq NaN 19998 4087 14467 \n", "mean 42761.429795 NaN NaN NaN \n", "std 24944.585138 NaN NaN NaN \n", "min 0.000000 NaN NaN NaN \n", "25% 20700.000000 NaN NaN NaN \n", "50% 43200.000000 NaN NaN NaN \n", "75% 64800.000000 NaN NaN NaN \n", "max 85500.000000 NaN NaN NaN " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_data.describe(include='all')" ] }, { "cell_type": "code", "execution_count": 8, "id": "radical-score", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateUsage_kWhLagging_Current_Reactive.Power_kVarhLeading_Current_Reactive_Power_kVarhCO2(tCO2)Lagging_Current_Power_FactorLeading_Current_Power_FactorNSMWeekStatusDay_of_weekLoad_Type
count35043504.0000003504.0000003504.0000003504.0000003504.0000003504.0000003504.000000350435043504
unique3504NaNNaNNaNNaNNaNNaNNaN273
top20/06/2018 13:00NaNNaNNaNNaNNaNNaNNaNWeekdayWednesdayLight_Load
freq1NaNNaNNaNNaNNaNNaNNaN25225271837
meanNaN26.35568512.3747173.8910930.01105080.68775184.08279442594.092466NaNNaNNaN
stdNaN32.51974915.8309617.3530280.01576219.05301830.61414425222.804637NaNNaNNaN
minNaN2.4800000.0000000.0000000.00000040.29000012.5400000.000000NaNNaNNaN
25%NaN3.1925002.0900000.0000000.00000063.13000099.56250020700.000000NaNNaNNaN
50%NaN4.5000004.9000000.0000000.00000088.210000100.00000042300.000000NaNNaNNaN
75%NaN49.57000020.7000002.9675000.02000099.390000100.00000064800.000000NaNNaNNaN
maxNaN153.14000082.94000027.6500000.070000100.000000100.00000085500.000000NaNNaNNaN
\n", "
" ], "text/plain": [ " date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n", "count 3504 3504.000000 3504.000000 \n", "unique 3504 NaN NaN \n", "top 20/06/2018 13:00 NaN NaN \n", "freq 1 NaN NaN \n", "mean NaN 26.355685 12.374717 \n", "std NaN 32.519749 15.830961 \n", "min NaN 2.480000 0.000000 \n", "25% NaN 3.192500 2.090000 \n", "50% NaN 4.500000 4.900000 \n", "75% NaN 49.570000 20.700000 \n", "max NaN 153.140000 82.940000 \n", "\n", " Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n", "count 3504.000000 3504.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 3.891093 0.011050 \n", "std 7.353028 0.015762 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 2.967500 0.020000 \n", "max 27.650000 0.070000 \n", "\n", " Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n", "count 3504.000000 3504.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 80.687751 84.082794 \n", "std 19.053018 30.614144 \n", "min 40.290000 12.540000 \n", "25% 63.130000 99.562500 \n", "50% 88.210000 100.000000 \n", "75% 99.390000 100.000000 \n", "max 100.000000 100.000000 \n", "\n", " NSM WeekStatus Day_of_week Load_Type \n", "count 3504.000000 3504 3504 3504 \n", "unique NaN 2 7 3 \n", "top NaN Weekday Wednesday Light_Load \n", "freq NaN 2522 527 1837 \n", "mean 42594.092466 NaN NaN NaN \n", "std 25222.804637 NaN NaN NaN \n", "min 0.000000 NaN NaN NaN \n", "25% 20700.000000 NaN NaN NaN \n", "50% 42300.000000 NaN NaN NaN \n", "75% 64800.000000 NaN NaN NaN \n", "max 85500.000000 NaN NaN NaN " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_data.describe(include='all')" ] }, { "cell_type": "code", "execution_count": 9, "id": "attempted-lafayette", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateUsage_kWhLagging_Current_Reactive.Power_kVarhLeading_Current_Reactive_Power_kVarhCO2(tCO2)Lagging_Current_Power_FactorLeading_Current_Power_FactorNSMWeekStatusDay_of_weekLoad_Type
count35043504.0000003504.0000003504.0000003504.0000003504.0000003504.0000003504.000000350435043504
unique3504NaNNaNNaNNaNNaNNaNNaN273
top16/11/2018 16:45NaNNaNNaNNaNNaNNaNNaNWeekdayTuesdayLight_Load
freq1NaNNaNNaNNaNNaNNaNNaN25365431768
meanNaN28.79184913.7647093.8183820.01221280.93165084.63981742814.469178NaNNaNNaN
stdNaN34.11523816.8724007.3250160.01649918.69683430.25874324628.829557NaNNaNNaN
minNaN2.4800000.0000000.0000000.00000038.33000013.0500000.000000NaNNaNNaN
25%NaN3.2400002.3800000.0000000.00000064.11250099.73000021600.000000NaNNaNNaN
50%NaN4.7200005.1100000.0000000.00000088.325000100.00000043200.000000NaNNaNNaN
75%NaN53.22750024.8100001.9175000.02000098.792500100.00000063900.000000NaNNaNNaN
maxNaN146.88000087.70000027.5400000.070000100.000000100.00000085500.000000NaNNaNNaN
\n", "
" ], "text/plain": [ " date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n", "count 3504 3504.000000 3504.000000 \n", "unique 3504 NaN NaN \n", "top 16/11/2018 16:45 NaN NaN \n", "freq 1 NaN NaN \n", "mean NaN 28.791849 13.764709 \n", "std NaN 34.115238 16.872400 \n", "min NaN 2.480000 0.000000 \n", "25% NaN 3.240000 2.380000 \n", "50% NaN 4.720000 5.110000 \n", "75% NaN 53.227500 24.810000 \n", "max NaN 146.880000 87.700000 \n", "\n", " Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n", "count 3504.000000 3504.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 3.818382 0.012212 \n", "std 7.325016 0.016499 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 1.917500 0.020000 \n", "max 27.540000 0.070000 \n", "\n", " Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n", "count 3504.000000 3504.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 80.931650 84.639817 \n", "std 18.696834 30.258743 \n", "min 38.330000 13.050000 \n", "25% 64.112500 99.730000 \n", "50% 88.325000 100.000000 \n", "75% 98.792500 100.000000 \n", "max 100.000000 100.000000 \n", "\n", " NSM WeekStatus Day_of_week Load_Type \n", "count 3504.000000 3504 3504 3504 \n", "unique NaN 2 7 3 \n", "top NaN Weekday Tuesday Light_Load \n", "freq NaN 2536 543 1768 \n", "mean 42814.469178 NaN NaN NaN \n", "std 24628.829557 NaN NaN NaN \n", "min 0.000000 NaN NaN NaN \n", "25% 21600.000000 NaN NaN NaN \n", "50% 43200.000000 NaN NaN NaN \n", "75% 63900.000000 NaN NaN NaN \n", "max 85500.000000 NaN NaN NaN " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dev_data.describe(include='all')" ] }, { "cell_type": "code", "execution_count": 11, "id": "banned-scottish", "metadata": {}, "outputs": [], "source": [ "test_data.to_csv(\"steel_industry_data_test.csv\", encoding=\"utf-8\", index=False)\n", "dev_data.to_csv(\"steel_industry_data_dev.csv\", encoding=\"utf-8\", index=False)\n", "train_data.to_csv(\"steel_industry_data_train.csv\", encoding=\"utf-8\", index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" } }, "nbformat": 4, "nbformat_minor": 5 }