diff --git a/IUM_dane02.ipynb b/IUM_dane02.ipynb new file mode 100644 index 0000000..462690f --- /dev/null +++ b/IUM_dane02.ipynb @@ -0,0 +1,1469 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "id": "expected-payroll", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: kaggle in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (1.5.12)\n", + "Requirement already satisfied: six>=1.10 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (1.15.0)\n", + "Requirement already satisfied: certifi in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (2021.10.8)\n", + "Requirement already satisfied: python-dateutil in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (2.8.1)\n", + "Requirement already satisfied: requests in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (2.27.1)\n", + "Requirement already satisfied: tqdm in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (4.59.0)\n", + "Requirement already satisfied: python-slugify in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (6.1.1)\n", + "Requirement already satisfied: urllib3 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (1.26.9)\n", + "Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n", + "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->kaggle) (3.3)\n", + "Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->kaggle) (2.0.12)\n", + "Requirement already satisfied: pandas in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (1.4.1)\n", + "Requirement already satisfied: pytz>=2020.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (2022.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (2.8.1)\n", + "Requirement already satisfied: numpy>=1.18.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (1.20.1)\n", + "Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.15.0)\n", + "Requirement already satisfied: seaborn in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (0.11.2)\n", + "Requirement already satisfied: pandas>=0.23 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.15 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (1.20.1)\n", + "Requirement already satisfied: scipy>=1.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (1.6.1)\n", + "Requirement already satisfied: matplotlib>=2.2 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (3.5.1)\n", + "Requirement already satisfied: packaging>=20.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (20.9)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n", + "Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (4.31.1)\n", + "Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n", + "Requirement already satisfied: pillow>=6.2.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (9.0.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.4.0)\n", + "Requirement already satisfied: cycler>=0.10 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n", + "Requirement already satisfied: pytz>=2020.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas>=0.23->seaborn) (2022.1)\n", + "Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.15.0)\n" + ] + } + ], + "source": [ + "!pip install kaggle\n", + "!pip install pandas\n", + "!pip install seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "genetic-plaintiff", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading steel-industry-energy-consumption.zip to D:\\UAM zajecia\\IUM\\ium_470623\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " 0%| | 0.00/484k [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateUsage_kWhLagging_Current_Reactive.Power_kVarhLeading_Current_Reactive_Power_kVarhCO2(tCO2)Lagging_Current_Power_FactorLeading_Current_Power_FactorNSMWeekStatusDay_of_weekLoad_Type
001/01/2018 00:153.172.950.000.073.21100.00900WeekdayMondayLight_Load
101/01/2018 00:304.004.460.000.066.77100.001800WeekdayMondayLight_Load
201/01/2018 00:453.243.280.000.070.28100.002700WeekdayMondayLight_Load
301/01/2018 01:003.313.560.000.068.09100.003600WeekdayMondayLight_Load
401/01/2018 01:153.824.500.000.064.72100.004500WeekdayMondayLight_Load
....................................
3503531/12/2018 23:003.854.860.000.062.10100.0082800WeekdayMondayLight_Load
3503631/12/2018 23:153.743.740.000.070.71100.0083700WeekdayMondayLight_Load
3503731/12/2018 23:303.783.170.070.076.6299.9884600WeekdayMondayLight_Load
3503831/12/2018 23:453.783.060.110.077.7299.9685500WeekdayMondayLight_Load
3503931/12/2018 00:003.673.020.070.077.2299.980WeekdayMondayLight_Load
\n", + "

35040 rows × 11 columns

\n", + "" + ], + "text/plain": [ + " date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n", + "0 01/01/2018 00:15 3.17 2.95 \n", + "1 01/01/2018 00:30 4.00 4.46 \n", + "2 01/01/2018 00:45 3.24 3.28 \n", + "3 01/01/2018 01:00 3.31 3.56 \n", + "4 01/01/2018 01:15 3.82 4.50 \n", + "... ... ... ... \n", + "35035 31/12/2018 23:00 3.85 4.86 \n", + "35036 31/12/2018 23:15 3.74 3.74 \n", + "35037 31/12/2018 23:30 3.78 3.17 \n", + "35038 31/12/2018 23:45 3.78 3.06 \n", + "35039 31/12/2018 00:00 3.67 3.02 \n", + "\n", + " Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n", + "0 0.00 0.0 \n", + "1 0.00 0.0 \n", + "2 0.00 0.0 \n", + "3 0.00 0.0 \n", + "4 0.00 0.0 \n", + "... ... ... \n", + "35035 0.00 0.0 \n", + "35036 0.00 0.0 \n", + "35037 0.07 0.0 \n", + "35038 0.11 0.0 \n", + "35039 0.07 0.0 \n", + "\n", + " Lagging_Current_Power_Factor Leading_Current_Power_Factor NSM \\\n", + "0 73.21 100.00 900 \n", + "1 66.77 100.00 1800 \n", + "2 70.28 100.00 2700 \n", + "3 68.09 100.00 3600 \n", + "4 64.72 100.00 4500 \n", + "... ... ... ... \n", + "35035 62.10 100.00 82800 \n", + "35036 70.71 100.00 83700 \n", + "35037 76.62 99.98 84600 \n", + "35038 77.72 99.96 85500 \n", + "35039 77.22 99.98 0 \n", + "\n", + " WeekStatus Day_of_week Load_Type \n", + "0 Weekday Monday Light_Load \n", + "1 Weekday Monday Light_Load \n", + "2 Weekday Monday Light_Load \n", + "3 Weekday Monday Light_Load \n", + "4 Weekday Monday Light_Load \n", + "... ... ... ... \n", + "35035 Weekday Monday Light_Load \n", + "35036 Weekday Monday Light_Load \n", + "35037 Weekday Monday Light_Load \n", + "35038 Weekday Monday Light_Load \n", + "35039 Weekday Monday Light_Load \n", + "\n", + "[35040 rows x 11 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "energy_data=pd.read_csv('Steel_industry_data.csv')\n", + "energy_data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "heated-spectacular", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateUsage_kWhLagging_Current_Reactive.Power_kVarhLeading_Current_Reactive_Power_kVarhCO2(tCO2)Lagging_Current_Power_FactorLeading_Current_Power_FactorNSMWeekStatusDay_of_weekLoad_Type
count3504035040.00000035040.00000035040.00000035040.00000035040.00000035040.00000035040.000000350403504035040
unique35040NaNNaNNaNNaNNaNNaNNaN273
top01/01/2018 00:15NaNNaNNaNNaNNaNNaNNaNWeekdayMondayLight_Load
freq1NaNNaNNaNNaNNaNNaNNaN25056508818072
meanNaN27.38689213.0353843.8709490.01152480.57805684.36787042750.000000NaNNaNNaN
stdNaN33.44438016.3060007.4244630.01615118.92132230.45653524940.534317NaNNaNNaN
minNaN0.0000000.0000000.0000000.0000000.0000000.0000000.000000NaNNaNNaN
25%NaN3.2000002.3000000.0000000.00000063.32000099.70000021375.000000NaNNaNNaN
50%NaN4.5700005.0000000.0000000.00000087.960000100.00000042750.000000NaNNaNNaN
75%NaN51.23750022.6400002.0900000.02000099.022500100.00000064125.000000NaNNaNNaN
maxNaN157.18000096.91000027.7600000.070000100.000000100.00000085500.000000NaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n", + "count 35040 35040.000000 35040.000000 \n", + "unique 35040 NaN NaN \n", + "top 01/01/2018 00:15 NaN NaN \n", + "freq 1 NaN NaN \n", + "mean NaN 27.386892 13.035384 \n", + "std NaN 33.444380 16.306000 \n", + "min NaN 0.000000 0.000000 \n", + "25% NaN 3.200000 2.300000 \n", + "50% NaN 4.570000 5.000000 \n", + "75% NaN 51.237500 22.640000 \n", + "max NaN 157.180000 96.910000 \n", + "\n", + " Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n", + "count 35040.000000 35040.000000 \n", + "unique NaN NaN \n", + "top NaN NaN \n", + "freq NaN NaN \n", + "mean 3.870949 0.011524 \n", + "std 7.424463 0.016151 \n", + "min 0.000000 0.000000 \n", + "25% 0.000000 0.000000 \n", + "50% 0.000000 0.000000 \n", + "75% 2.090000 0.020000 \n", + "max 27.760000 0.070000 \n", + "\n", + " Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n", + "count 35040.000000 35040.000000 \n", + "unique NaN NaN \n", + "top NaN NaN \n", + "freq NaN NaN \n", + "mean 80.578056 84.367870 \n", + "std 18.921322 30.456535 \n", + "min 0.000000 0.000000 \n", + "25% 63.320000 99.700000 \n", + "50% 87.960000 100.000000 \n", + "75% 99.022500 100.000000 \n", + "max 100.000000 100.000000 \n", + "\n", + " NSM WeekStatus Day_of_week Load_Type \n", + "count 35040.000000 35040 35040 35040 \n", + "unique NaN 2 7 3 \n", + "top NaN Weekday Monday Light_Load \n", + "freq NaN 25056 5088 18072 \n", + "mean 42750.000000 NaN NaN NaN \n", + "std 24940.534317 NaN NaN NaN \n", + "min 0.000000 NaN NaN NaN \n", + "25% 21375.000000 NaN NaN NaN \n", + "50% 42750.000000 NaN NaN NaN \n", + "75% 64125.000000 NaN NaN NaN \n", + "max 85500.000000 NaN NaN NaN " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "energy_data.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "loved-delight", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training set size:\n", + "(31536, 11)\n", + "Testing set size:\n", + "(1752, 11)\n", + "Dev set size:\n", + "(1752, 11)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "train_data, test_data = train_test_split(energy_data, test_size=3504, random_state=1)\n", + "test_data, dev_data = train_test_split(test_data, test_size=1752, random_state=1)\n", + "print('Training set size:')\n", + "print(train_data.shape)\n", + "print('Testing set size:')\n", + "print(test_data.shape)\n", + "print('Dev set size:')\n", + "print(dev_data.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "formed-virginia", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateUsage_kWhLagging_Current_Reactive.Power_kVarhLeading_Current_Reactive_Power_kVarhCO2(tCO2)Lagging_Current_Power_FactorLeading_Current_Power_FactorNSMWeekStatusDay_of_weekLoad_Type
count3153631536.00000031536.00000031536.00000031536.00000031536.00000031536.00000031536.000000315363153631536
unique31536NaNNaNNaNNaNNaNNaNNaN273
top30/01/2018 00:15NaNNaNNaNNaNNaNNaNNaNWeekdayMondayLight_Load
freq1NaNNaNNaNNaNNaNNaNNaN22514456016280
meanNaN27.36944913.0379463.8660590.01151380.52505884.41008642707.363014NaNNaNNaN
stdNaN33.47330416.3029107.4342500.01615918.92957130.43667524968.193911NaNNaNNaN
minNaN0.0000000.0000000.0000000.0000000.0000000.0000000.000000NaNNaNNaN
25%NaN3.2000002.3300000.0000000.00000063.20000099.72000020700.000000NaNNaNNaN
50%NaN4.5700005.0000000.0000000.00000087.900000100.00000042300.000000NaNNaNNaN
75%NaN51.23000022.6500001.9800000.02000098.970000100.00000063900.000000NaNNaNNaN
maxNaN157.18000096.91000027.7600000.070000100.000000100.00000085500.000000NaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n", + "count 31536 31536.000000 31536.000000 \n", + "unique 31536 NaN NaN \n", + "top 30/01/2018 00:15 NaN NaN \n", + "freq 1 NaN NaN \n", + "mean NaN 27.369449 13.037946 \n", + "std NaN 33.473304 16.302910 \n", + "min NaN 0.000000 0.000000 \n", + "25% NaN 3.200000 2.330000 \n", + "50% NaN 4.570000 5.000000 \n", + "75% NaN 51.230000 22.650000 \n", + "max NaN 157.180000 96.910000 \n", + "\n", + " Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n", + "count 31536.000000 31536.000000 \n", + "unique NaN NaN \n", + "top NaN NaN \n", + "freq NaN NaN \n", + "mean 3.866059 0.011513 \n", + "std 7.434250 0.016159 \n", + "min 0.000000 0.000000 \n", + "25% 0.000000 0.000000 \n", + "50% 0.000000 0.000000 \n", + "75% 1.980000 0.020000 \n", + "max 27.760000 0.070000 \n", + "\n", + " Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n", + "count 31536.000000 31536.000000 \n", + "unique NaN NaN \n", + "top NaN NaN \n", + "freq NaN NaN \n", + "mean 80.525058 84.410086 \n", + "std 18.929571 30.436675 \n", + "min 0.000000 0.000000 \n", + "25% 63.200000 99.720000 \n", + "50% 87.900000 100.000000 \n", + "75% 98.970000 100.000000 \n", + "max 100.000000 100.000000 \n", + "\n", + " NSM WeekStatus Day_of_week Load_Type \n", + "count 31536.000000 31536 31536 31536 \n", + "unique NaN 2 7 3 \n", + "top NaN Weekday Monday Light_Load \n", + "freq NaN 22514 4560 16280 \n", + "mean 42707.363014 NaN NaN NaN \n", + "std 24968.193911 NaN NaN NaN \n", + "min 0.000000 NaN NaN NaN \n", + "25% 20700.000000 NaN NaN NaN \n", + "50% 42300.000000 NaN NaN NaN \n", + "75% 63900.000000 NaN NaN NaN \n", + "max 85500.000000 NaN NaN NaN " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_data.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "radical-score", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateUsage_kWhLagging_Current_Reactive.Power_kVarhLeading_Current_Reactive_Power_kVarhCO2(tCO2)Lagging_Current_Power_FactorLeading_Current_Power_FactorNSMWeekStatusDay_of_weekLoad_Type
count17521752.0000001752.0000001752.0000001752.0000001752.0000001752.0000001752.000000175217521752
unique1752NaNNaNNaNNaNNaNNaNNaN273
top07/05/2018 06:00NaNNaNNaNNaNNaNNaNNaNWeekdayTuesdayLight_Load
freq1NaNNaNNaNNaNNaNNaNNaN1268291898
meanNaN27.33098212.6490243.9492810.01153081.36452683.63070243080.821918NaNNaNNaN
stdNaN33.48421616.1852837.2986370.01622418.75833830.80118024944.325392NaNNaNNaN
minNaN2.4800000.0000000.0000000.00000041.12000012.5400000.000000NaNNaNNaN
25%NaN3.2000001.3925000.0000000.00000064.63000099.18000021600.000000NaNNaNNaN
50%NaN4.5700004.9300000.0000000.00000088.955000100.00000043200.000000NaNNaNNaN
75%NaN49.87000021.2400003.8375000.02000099.852500100.00000064800.000000NaNNaNNaN
maxNaN143.93000087.70000027.5400000.070000100.000000100.00000085500.000000NaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n", + "count 1752 1752.000000 1752.000000 \n", + "unique 1752 NaN NaN \n", + "top 07/05/2018 06:00 NaN NaN \n", + "freq 1 NaN NaN \n", + "mean NaN 27.330982 12.649024 \n", + "std NaN 33.484216 16.185283 \n", + "min NaN 2.480000 0.000000 \n", + "25% NaN 3.200000 1.392500 \n", + "50% NaN 4.570000 4.930000 \n", + "75% NaN 49.870000 21.240000 \n", + "max NaN 143.930000 87.700000 \n", + "\n", + " Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n", + "count 1752.000000 1752.000000 \n", + "unique NaN NaN \n", + "top NaN NaN \n", + "freq NaN NaN \n", + "mean 3.949281 0.011530 \n", + "std 7.298637 0.016224 \n", + "min 0.000000 0.000000 \n", + "25% 0.000000 0.000000 \n", + "50% 0.000000 0.000000 \n", + "75% 3.837500 0.020000 \n", + "max 27.540000 0.070000 \n", + "\n", + " Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n", + "count 1752.000000 1752.000000 \n", + "unique NaN NaN \n", + "top NaN NaN \n", + "freq NaN NaN \n", + "mean 81.364526 83.630702 \n", + "std 18.758338 30.801180 \n", + "min 41.120000 12.540000 \n", + "25% 64.630000 99.180000 \n", + "50% 88.955000 100.000000 \n", + "75% 99.852500 100.000000 \n", + "max 100.000000 100.000000 \n", + "\n", + " NSM WeekStatus Day_of_week Load_Type \n", + "count 1752.000000 1752 1752 1752 \n", + "unique NaN 2 7 3 \n", + "top NaN Weekday Tuesday Light_Load \n", + "freq NaN 1268 291 898 \n", + "mean 43080.821918 NaN NaN NaN \n", + "std 24944.325392 NaN NaN NaN \n", + "min 0.000000 NaN NaN NaN \n", + "25% 21600.000000 NaN NaN NaN \n", + "50% 43200.000000 NaN NaN NaN \n", + "75% 64800.000000 NaN NaN NaN \n", + "max 85500.000000 NaN NaN NaN " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_data.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "attempted-lafayette", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateUsage_kWhLagging_Current_Reactive.Power_kVarhLeading_Current_Reactive_Power_kVarhCO2(tCO2)Lagging_Current_Power_FactorLeading_Current_Power_FactorNSMWeekStatusDay_of_weekLoad_Type
count17521752.0000001752.0000001752.0000001752.0000001752.0000001752.0000001752.000000175217521752
unique1752NaNNaNNaNNaNNaNNaNNaN273
top02/06/2018 02:00NaNNaNNaNNaNNaNNaNNaNWeekdayMondayLight_Load
freq1NaNNaNNaNNaNNaNNaNNaN1274275894
meanNaN27.75678713.3756283.8806340.01172980.74554884.34515443186.643836NaNNaNNaN
stdNaN32.89580216.4821487.3764680.01594318.92737830.47542724440.888112NaNNaNNaN
minNaN2.5200000.0000000.0000000.00000038.33000014.0700000.000000NaNNaNNaN
25%NaN3.2000002.2700000.0000000.00000063.94250099.69000022500.000000NaNNaNNaN
50%NaN4.6800005.1100000.0000000.00000087.940000100.00000043200.000000NaNNaNNaN
75%NaN52.18750024.0500002.1775000.02000099.030000100.00000063900.000000NaNNaNNaN
maxNaN139.03000080.75000027.5800000.060000100.000000100.00000085500.000000NaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n", + "count 1752 1752.000000 1752.000000 \n", + "unique 1752 NaN NaN \n", + "top 02/06/2018 02:00 NaN NaN \n", + "freq 1 NaN NaN \n", + "mean NaN 27.756787 13.375628 \n", + "std NaN 32.895802 16.482148 \n", + "min NaN 2.520000 0.000000 \n", + "25% NaN 3.200000 2.270000 \n", + "50% NaN 4.680000 5.110000 \n", + "75% NaN 52.187500 24.050000 \n", + "max NaN 139.030000 80.750000 \n", + "\n", + " Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n", + "count 1752.000000 1752.000000 \n", + "unique NaN NaN \n", + "top NaN NaN \n", + "freq NaN NaN \n", + "mean 3.880634 0.011729 \n", + "std 7.376468 0.015943 \n", + "min 0.000000 0.000000 \n", + "25% 0.000000 0.000000 \n", + "50% 0.000000 0.000000 \n", + "75% 2.177500 0.020000 \n", + "max 27.580000 0.060000 \n", + "\n", + " Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n", + "count 1752.000000 1752.000000 \n", + "unique NaN NaN \n", + "top NaN NaN \n", + "freq NaN NaN \n", + "mean 80.745548 84.345154 \n", + "std 18.927378 30.475427 \n", + "min 38.330000 14.070000 \n", + "25% 63.942500 99.690000 \n", + "50% 87.940000 100.000000 \n", + "75% 99.030000 100.000000 \n", + "max 100.000000 100.000000 \n", + "\n", + " NSM WeekStatus Day_of_week Load_Type \n", + "count 1752.000000 1752 1752 1752 \n", + "unique NaN 2 7 3 \n", + "top NaN Weekday Monday Light_Load \n", + "freq NaN 1274 275 894 \n", + "mean 43186.643836 NaN NaN NaN \n", + "std 24440.888112 NaN NaN NaN \n", + "min 0.000000 NaN NaN NaN \n", + "25% 22500.000000 NaN NaN NaN \n", + "50% 43200.000000 NaN NaN NaN \n", + "75% 63900.000000 NaN NaN NaN \n", + "max 85500.000000 NaN NaN NaN " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dev_data.describe(include='all')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/README.md b/README.md index f1985aa..f6e377e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ -# ium_470623 +Inżynieria uczenia maszynowego -Inżynieria uczenia maszynowego \ No newline at end of file +Zbiór danych: +https://www.kaggle.com/datasets/csafrit2/steel-industry-energy-consumption \ No newline at end of file