diff --git a/Dockerfile b/Dockerfile
index 45f20d3..eb57ed5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,19 +1,18 @@
FROM ubuntu:latest
RUN apt update && apt install -y python3-pip
-RUN apt install -y unzip
-RUN pip install --user kaggle pandas seaborn sklearn
+RUN apt install -y unzip python3
+RUN pip3 install kaggle pandas seaborn sklearn
RUN mkdir ~/.kaggle/
RUN echo '{"username":"ikami1","key":"c70ff184133bfabb351608b128e76cd2"}' > ~/.kaggle/kaggle.json
WORKDIR /ium
-#COPY ./download_dataset.sh ./
-COPY ./Steel_industry_data.csv ./
-COPY ./process_dataset.py ./
-#COPY ./stats.sh ./
+COPY download_dataset.sh process_dataset.py stats.sh Steel_industry_data.csv ./
+
+RUN chmod a+x download_dataset.sh process_dataset.py
#CMD ./download_dataset.sh
-CMD python3 process_dataset.py
+#CMD python3 process_dataset.py
#CMD ./stats.sh
\ No newline at end of file
diff --git a/IUM_dane02.ipynb b/IUM_dane02.ipynb
index 462690f..afefe7d 100644
--- a/IUM_dane02.ipynb
+++ b/IUM_dane02.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 1,
"id": "expected-payroll",
"metadata": {},
"outputs": [
@@ -10,36 +10,35 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Requirement already satisfied: kaggle in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (1.5.12)\n",
- "Requirement already satisfied: six>=1.10 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (1.15.0)\n",
- "Requirement already satisfied: certifi in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (2021.10.8)\n",
- "Requirement already satisfied: python-dateutil in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (2.8.1)\n",
- "Requirement already satisfied: requests in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (2.27.1)\n",
- "Requirement already satisfied: tqdm in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (4.59.0)\n",
- "Requirement already satisfied: python-slugify in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (6.1.1)\n",
- "Requirement already satisfied: urllib3 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (1.26.9)\n",
- "Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
- "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->kaggle) (3.3)\n",
- "Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->kaggle) (2.0.12)\n",
- "Requirement already satisfied: pandas in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (1.4.1)\n",
- "Requirement already satisfied: pytz>=2020.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (2022.1)\n",
- "Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (2.8.1)\n",
- "Requirement already satisfied: numpy>=1.18.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (1.20.1)\n",
- "Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.15.0)\n",
- "Requirement already satisfied: seaborn in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (0.11.2)\n",
- "Requirement already satisfied: pandas>=0.23 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (1.4.1)\n",
- "Requirement already satisfied: numpy>=1.15 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (1.20.1)\n",
- "Requirement already satisfied: scipy>=1.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (1.6.1)\n",
- "Requirement already satisfied: matplotlib>=2.2 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (3.5.1)\n",
- "Requirement already satisfied: packaging>=20.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (20.9)\n",
- "Requirement already satisfied: pyparsing>=2.2.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n",
- "Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (4.31.1)\n",
- "Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n",
- "Requirement already satisfied: pillow>=6.2.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (9.0.1)\n",
- "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.4.0)\n",
- "Requirement already satisfied: cycler>=0.10 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n",
- "Requirement already satisfied: pytz>=2020.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas>=0.23->seaborn) (2022.1)\n",
- "Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.15.0)\n"
+ "Requirement already satisfied: kaggle in c:\\users\\cgala\\anaconda3\\lib\\site-packages (1.5.12)\n",
+ "Requirement already satisfied: tqdm in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (4.50.2)\n",
+ "Requirement already satisfied: requests in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2.24.0)\n",
+ "Requirement already satisfied: certifi in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2020.6.20)\n",
+ "Requirement already satisfied: six>=1.10 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (1.15.0)\n",
+ "Requirement already satisfied: python-slugify in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (6.1.1)\n",
+ "Requirement already satisfied: urllib3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (1.25.11)\n",
+ "Requirement already satisfied: python-dateutil in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2.8.1)\n",
+ "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from requests->kaggle) (2.10)\n",
+ "Requirement already satisfied: chardet<4,>=3.0.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from requests->kaggle) (3.0.4)\n",
+ "Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
+ "Requirement already satisfied: pandas in c:\\users\\cgala\\anaconda3\\lib\\site-packages (1.1.3)\n",
+ "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (2.8.1)\n",
+ "Requirement already satisfied: numpy>=1.15.4 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (1.19.2)\n",
+ "Requirement already satisfied: pytz>=2017.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (2020.1)\n",
+ "Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
+ "Requirement already satisfied: seaborn in c:\\users\\cgala\\anaconda3\\lib\\site-packages (0.11.0)\n",
+ "Requirement already satisfied: numpy>=1.15 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.19.2)\n",
+ "Requirement already satisfied: scipy>=1.0 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.5.2)\n",
+ "Requirement already satisfied: matplotlib>=2.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (3.3.2)\n",
+ "Requirement already satisfied: pandas>=0.23 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.1.3)\n",
+ "Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.3.0)\n",
+ "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n",
+ "Requirement already satisfied: cycler>=0.10 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n",
+ "Requirement already satisfied: pillow>=6.2.0 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (8.0.1)\n",
+ "Requirement already satisfied: certifi>=2020.06.20 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2020.6.20)\n",
+ "Requirement already satisfied: pytz>=2017.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas>=0.23->seaborn) (2020.1)\n",
+ "Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-dateutil>=2.1->matplotlib>=2.2->seaborn) (1.15.0)\n"
]
}
],
@@ -51,7 +50,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 3,
"id": "genetic-plaintiff",
"metadata": {},
"outputs": [
@@ -69,18 +68,18 @@
"text": [
"\n",
" 0%| | 0.00/484k [00:00, ?B/s]\n",
- "100%|##########| 484k/484k [00:00<00:00, 3.32MB/s]\n",
- "100%|##########| 484k/484k [00:00<00:00, 3.29MB/s]\n"
+ "100%|##########| 484k/484k [00:00<00:00, 2.36MB/s]\n",
+ "100%|##########| 484k/484k [00:00<00:00, 2.36MB/s]\n"
]
}
],
"source": [
- "!kaggle datasets download -d csafrit2/steel-industry-energy-consumption"
+ "!kaggle datasets download -d csafrit2/steel-industry-energy-consumption --force"
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 3,
"id": "compatible-following",
"metadata": {},
"outputs": [
@@ -628,7 +627,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 6,
"id": "loved-delight",
"metadata": {},
"outputs": [
@@ -637,19 +636,19 @@
"output_type": "stream",
"text": [
"Training set size:\n",
- "(31536, 11)\n",
+ "(28032, 11)\n",
"Testing set size:\n",
- "(1752, 11)\n",
+ "(3504, 11)\n",
"Dev set size:\n",
- "(1752, 11)\n"
+ "(3504, 11)\n"
]
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
- "train_data, test_data = train_test_split(energy_data, test_size=3504, random_state=1)\n",
- "test_data, dev_data = train_test_split(test_data, test_size=1752, random_state=1)\n",
+ "train_data, test_data = train_test_split(energy_data, test_size=7008, random_state=1)\n",
+ "test_data, dev_data = train_test_split(test_data, test_size=3504, random_state=1)\n",
"print('Training set size:')\n",
"print(train_data.shape)\n",
"print('Testing set size:')\n",
@@ -660,7 +659,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 7,
"id": "formed-virginia",
"metadata": {},
"outputs": [
@@ -701,21 +700,21 @@
"
\n",
" \n",
" count | \n",
- " 31536 | \n",
- " 31536.000000 | \n",
- " 31536.000000 | \n",
- " 31536.000000 | \n",
- " 31536.000000 | \n",
- " 31536.000000 | \n",
- " 31536.000000 | \n",
- " 31536.000000 | \n",
- " 31536 | \n",
- " 31536 | \n",
- " 31536 | \n",
+ " 28032 | \n",
+ " 28032.000000 | \n",
+ " 28032.000000 | \n",
+ " 28032.000000 | \n",
+ " 28032.000000 | \n",
+ " 28032.000000 | \n",
+ " 28032.000000 | \n",
+ " 28032.000000 | \n",
+ " 28032 | \n",
+ " 28032 | \n",
+ " 28032 | \n",
"
\n",
" \n",
" unique | \n",
- " 31536 | \n",
+ " 28032 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -729,7 +728,7 @@
"
\n",
" \n",
" top | \n",
- " 30/01/2018 00:15 | \n",
+ " 07/08/2018 14:15 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -751,20 +750,20 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " 22514 | \n",
- " 4560 | \n",
- " 16280 | \n",
+ " 19998 | \n",
+ " 4087 | \n",
+ " 14467 | \n",
"
\n",
" \n",
" mean | \n",
" NaN | \n",
- " 27.369449 | \n",
- " 13.037946 | \n",
- " 3.866059 | \n",
- " 0.011513 | \n",
- " 80.525058 | \n",
- " 84.410086 | \n",
- " 42707.363014 | \n",
+ " 27.340174 | \n",
+ " 13.026801 | \n",
+ " 3.875001 | \n",
+ " 0.011498 | \n",
+ " 80.520145 | \n",
+ " 84.369511 | \n",
+ " 42761.429795 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -772,13 +771,13 @@
"
\n",
" std | \n",
" NaN | \n",
- " 33.473304 | \n",
- " 16.302910 | \n",
- " 7.434250 | \n",
- " 0.016159 | \n",
- " 18.929571 | \n",
- " 30.436675 | \n",
- " 24968.193911 | \n",
+ " 33.469130 | \n",
+ " 16.289348 | \n",
+ " 7.445898 | \n",
+ " 0.016153 | \n",
+ " 18.932825 | \n",
+ " 30.462193 | \n",
+ " 24944.585138 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -801,11 +800,11 @@
" 25% | \n",
" NaN | \n",
" 3.200000 | \n",
- " 2.330000 | \n",
+ " 2.300000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
- " 63.200000 | \n",
- " 99.720000 | \n",
+ " 63.227500 | \n",
+ " 99.710000 | \n",
" 20700.000000 | \n",
" NaN | \n",
" NaN | \n",
@@ -818,9 +817,9 @@
" 5.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
- " 87.900000 | \n",
+ " 87.870000 | \n",
" 100.000000 | \n",
- " 42300.000000 | \n",
+ " 43200.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -828,13 +827,13 @@
"
\n",
" 75% | \n",
" NaN | \n",
- " 51.230000 | \n",
- " 22.650000 | \n",
- " 1.980000 | \n",
+ " 51.190000 | \n",
+ " 22.750000 | \n",
+ " 2.020000 | \n",
" 0.020000 | \n",
- " 98.970000 | \n",
+ " 99.000000 | \n",
" 100.000000 | \n",
- " 63900.000000 | \n",
+ " 64800.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -859,59 +858,59 @@
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
- "count 31536 31536.000000 31536.000000 \n",
- "unique 31536 NaN NaN \n",
- "top 30/01/2018 00:15 NaN NaN \n",
+ "count 28032 28032.000000 28032.000000 \n",
+ "unique 28032 NaN NaN \n",
+ "top 07/08/2018 14:15 NaN NaN \n",
"freq 1 NaN NaN \n",
- "mean NaN 27.369449 13.037946 \n",
- "std NaN 33.473304 16.302910 \n",
+ "mean NaN 27.340174 13.026801 \n",
+ "std NaN 33.469130 16.289348 \n",
"min NaN 0.000000 0.000000 \n",
- "25% NaN 3.200000 2.330000 \n",
+ "25% NaN 3.200000 2.300000 \n",
"50% NaN 4.570000 5.000000 \n",
- "75% NaN 51.230000 22.650000 \n",
+ "75% NaN 51.190000 22.750000 \n",
"max NaN 157.180000 96.910000 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
- "count 31536.000000 31536.000000 \n",
+ "count 28032.000000 28032.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
- "mean 3.866059 0.011513 \n",
- "std 7.434250 0.016159 \n",
+ "mean 3.875001 0.011498 \n",
+ "std 7.445898 0.016153 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
- "75% 1.980000 0.020000 \n",
+ "75% 2.020000 0.020000 \n",
"max 27.760000 0.070000 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
- "count 31536.000000 31536.000000 \n",
+ "count 28032.000000 28032.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
- "mean 80.525058 84.410086 \n",
- "std 18.929571 30.436675 \n",
+ "mean 80.520145 84.369511 \n",
+ "std 18.932825 30.462193 \n",
"min 0.000000 0.000000 \n",
- "25% 63.200000 99.720000 \n",
- "50% 87.900000 100.000000 \n",
- "75% 98.970000 100.000000 \n",
+ "25% 63.227500 99.710000 \n",
+ "50% 87.870000 100.000000 \n",
+ "75% 99.000000 100.000000 \n",
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
- "count 31536.000000 31536 31536 31536 \n",
+ "count 28032.000000 28032 28032 28032 \n",
"unique NaN 2 7 3 \n",
"top NaN Weekday Monday Light_Load \n",
- "freq NaN 22514 4560 16280 \n",
- "mean 42707.363014 NaN NaN NaN \n",
- "std 24968.193911 NaN NaN NaN \n",
+ "freq NaN 19998 4087 14467 \n",
+ "mean 42761.429795 NaN NaN NaN \n",
+ "std 24944.585138 NaN NaN NaN \n",
"min 0.000000 NaN NaN NaN \n",
"25% 20700.000000 NaN NaN NaN \n",
- "50% 42300.000000 NaN NaN NaN \n",
- "75% 63900.000000 NaN NaN NaN \n",
+ "50% 43200.000000 NaN NaN NaN \n",
+ "75% 64800.000000 NaN NaN NaN \n",
"max 85500.000000 NaN NaN NaN "
]
},
- "execution_count": 12,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -922,7 +921,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 8,
"id": "radical-score",
"metadata": {},
"outputs": [
@@ -963,21 +962,21 @@
"
\n",
" \n",
" count | \n",
- " 1752 | \n",
- " 1752.000000 | \n",
- " 1752.000000 | \n",
- " 1752.000000 | \n",
- " 1752.000000 | \n",
- " 1752.000000 | \n",
- " 1752.000000 | \n",
- " 1752.000000 | \n",
- " 1752 | \n",
- " 1752 | \n",
- " 1752 | \n",
+ " 3504 | \n",
+ " 3504.000000 | \n",
+ " 3504.000000 | \n",
+ " 3504.000000 | \n",
+ " 3504.000000 | \n",
+ " 3504.000000 | \n",
+ " 3504.000000 | \n",
+ " 3504.000000 | \n",
+ " 3504 | \n",
+ " 3504 | \n",
+ " 3504 | \n",
"
\n",
" \n",
" unique | \n",
- " 1752 | \n",
+ " 3504 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -991,7 +990,7 @@
"
\n",
" \n",
" top | \n",
- " 07/05/2018 06:00 | \n",
+ " 20/06/2018 13:00 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1000,7 +999,7 @@
" NaN | \n",
" NaN | \n",
" Weekday | \n",
- " Tuesday | \n",
+ " Wednesday | \n",
" Light_Load | \n",
"
\n",
" \n",
@@ -1013,20 +1012,20 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " 1268 | \n",
- " 291 | \n",
- " 898 | \n",
+ " 2522 | \n",
+ " 527 | \n",
+ " 1837 | \n",
"
\n",
" \n",
" mean | \n",
" NaN | \n",
- " 27.330982 | \n",
- " 12.649024 | \n",
- " 3.949281 | \n",
- " 0.011530 | \n",
- " 81.364526 | \n",
- " 83.630702 | \n",
- " 43080.821918 | \n",
+ " 26.355685 | \n",
+ " 12.374717 | \n",
+ " 3.891093 | \n",
+ " 0.011050 | \n",
+ " 80.687751 | \n",
+ " 84.082794 | \n",
+ " 42594.092466 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1034,13 +1033,13 @@
"
\n",
" std | \n",
" NaN | \n",
- " 33.484216 | \n",
- " 16.185283 | \n",
- " 7.298637 | \n",
- " 0.016224 | \n",
- " 18.758338 | \n",
- " 30.801180 | \n",
- " 24944.325392 | \n",
+ " 32.519749 | \n",
+ " 15.830961 | \n",
+ " 7.353028 | \n",
+ " 0.015762 | \n",
+ " 19.053018 | \n",
+ " 30.614144 | \n",
+ " 25222.804637 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1052,7 +1051,7 @@
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
- " 41.120000 | \n",
+ " 40.290000 | \n",
" 12.540000 | \n",
" 0.000000 | \n",
" NaN | \n",
@@ -1062,13 +1061,13 @@
"
\n",
" 25% | \n",
" NaN | \n",
- " 3.200000 | \n",
- " 1.392500 | \n",
+ " 3.192500 | \n",
+ " 2.090000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
- " 64.630000 | \n",
- " 99.180000 | \n",
- " 21600.000000 | \n",
+ " 63.130000 | \n",
+ " 99.562500 | \n",
+ " 20700.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1076,13 +1075,13 @@
"
\n",
" 50% | \n",
" NaN | \n",
- " 4.570000 | \n",
- " 4.930000 | \n",
+ " 4.500000 | \n",
+ " 4.900000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
- " 88.955000 | \n",
+ " 88.210000 | \n",
" 100.000000 | \n",
- " 43200.000000 | \n",
+ " 42300.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1090,11 +1089,11 @@
"
\n",
" 75% | \n",
" NaN | \n",
- " 49.870000 | \n",
- " 21.240000 | \n",
- " 3.837500 | \n",
+ " 49.570000 | \n",
+ " 20.700000 | \n",
+ " 2.967500 | \n",
" 0.020000 | \n",
- " 99.852500 | \n",
+ " 99.390000 | \n",
" 100.000000 | \n",
" 64800.000000 | \n",
" NaN | \n",
@@ -1104,9 +1103,9 @@
"
\n",
" max | \n",
" NaN | \n",
- " 143.930000 | \n",
- " 87.700000 | \n",
- " 27.540000 | \n",
+ " 153.140000 | \n",
+ " 82.940000 | \n",
+ " 27.650000 | \n",
" 0.070000 | \n",
" 100.000000 | \n",
" 100.000000 | \n",
@@ -1121,59 +1120,59 @@
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
- "count 1752 1752.000000 1752.000000 \n",
- "unique 1752 NaN NaN \n",
- "top 07/05/2018 06:00 NaN NaN \n",
+ "count 3504 3504.000000 3504.000000 \n",
+ "unique 3504 NaN NaN \n",
+ "top 20/06/2018 13:00 NaN NaN \n",
"freq 1 NaN NaN \n",
- "mean NaN 27.330982 12.649024 \n",
- "std NaN 33.484216 16.185283 \n",
+ "mean NaN 26.355685 12.374717 \n",
+ "std NaN 32.519749 15.830961 \n",
"min NaN 2.480000 0.000000 \n",
- "25% NaN 3.200000 1.392500 \n",
- "50% NaN 4.570000 4.930000 \n",
- "75% NaN 49.870000 21.240000 \n",
- "max NaN 143.930000 87.700000 \n",
+ "25% NaN 3.192500 2.090000 \n",
+ "50% NaN 4.500000 4.900000 \n",
+ "75% NaN 49.570000 20.700000 \n",
+ "max NaN 153.140000 82.940000 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
- "count 1752.000000 1752.000000 \n",
+ "count 3504.000000 3504.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
- "mean 3.949281 0.011530 \n",
- "std 7.298637 0.016224 \n",
+ "mean 3.891093 0.011050 \n",
+ "std 7.353028 0.015762 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
- "75% 3.837500 0.020000 \n",
- "max 27.540000 0.070000 \n",
+ "75% 2.967500 0.020000 \n",
+ "max 27.650000 0.070000 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
- "count 1752.000000 1752.000000 \n",
+ "count 3504.000000 3504.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
- "mean 81.364526 83.630702 \n",
- "std 18.758338 30.801180 \n",
- "min 41.120000 12.540000 \n",
- "25% 64.630000 99.180000 \n",
- "50% 88.955000 100.000000 \n",
- "75% 99.852500 100.000000 \n",
+ "mean 80.687751 84.082794 \n",
+ "std 19.053018 30.614144 \n",
+ "min 40.290000 12.540000 \n",
+ "25% 63.130000 99.562500 \n",
+ "50% 88.210000 100.000000 \n",
+ "75% 99.390000 100.000000 \n",
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
- "count 1752.000000 1752 1752 1752 \n",
+ "count 3504.000000 3504 3504 3504 \n",
"unique NaN 2 7 3 \n",
- "top NaN Weekday Tuesday Light_Load \n",
- "freq NaN 1268 291 898 \n",
- "mean 43080.821918 NaN NaN NaN \n",
- "std 24944.325392 NaN NaN NaN \n",
+ "top NaN Weekday Wednesday Light_Load \n",
+ "freq NaN 2522 527 1837 \n",
+ "mean 42594.092466 NaN NaN NaN \n",
+ "std 25222.804637 NaN NaN NaN \n",
"min 0.000000 NaN NaN NaN \n",
- "25% 21600.000000 NaN NaN NaN \n",
- "50% 43200.000000 NaN NaN NaN \n",
+ "25% 20700.000000 NaN NaN NaN \n",
+ "50% 42300.000000 NaN NaN NaN \n",
"75% 64800.000000 NaN NaN NaN \n",
"max 85500.000000 NaN NaN NaN "
]
},
- "execution_count": 13,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -1184,7 +1183,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 9,
"id": "attempted-lafayette",
"metadata": {},
"outputs": [
@@ -1225,21 +1224,21 @@
"
\n",
" \n",
" count | \n",
- " 1752 | \n",
- " 1752.000000 | \n",
- " 1752.000000 | \n",
- " 1752.000000 | \n",
- " 1752.000000 | \n",
- " 1752.000000 | \n",
- " 1752.000000 | \n",
- " 1752.000000 | \n",
- " 1752 | \n",
- " 1752 | \n",
- " 1752 | \n",
+ " 3504 | \n",
+ " 3504.000000 | \n",
+ " 3504.000000 | \n",
+ " 3504.000000 | \n",
+ " 3504.000000 | \n",
+ " 3504.000000 | \n",
+ " 3504.000000 | \n",
+ " 3504.000000 | \n",
+ " 3504 | \n",
+ " 3504 | \n",
+ " 3504 | \n",
"
\n",
" \n",
" unique | \n",
- " 1752 | \n",
+ " 3504 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1253,7 +1252,7 @@
"
\n",
" \n",
" top | \n",
- " 02/06/2018 02:00 | \n",
+ " 16/11/2018 16:45 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1262,7 +1261,7 @@
" NaN | \n",
" NaN | \n",
" Weekday | \n",
- " Monday | \n",
+ " Tuesday | \n",
" Light_Load | \n",
"
\n",
" \n",
@@ -1275,20 +1274,20 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " 1274 | \n",
- " 275 | \n",
- " 894 | \n",
+ " 2536 | \n",
+ " 543 | \n",
+ " 1768 | \n",
"
\n",
" \n",
" mean | \n",
" NaN | \n",
- " 27.756787 | \n",
- " 13.375628 | \n",
- " 3.880634 | \n",
- " 0.011729 | \n",
- " 80.745548 | \n",
- " 84.345154 | \n",
- " 43186.643836 | \n",
+ " 28.791849 | \n",
+ " 13.764709 | \n",
+ " 3.818382 | \n",
+ " 0.012212 | \n",
+ " 80.931650 | \n",
+ " 84.639817 | \n",
+ " 42814.469178 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1296,13 +1295,13 @@
"
\n",
" std | \n",
" NaN | \n",
- " 32.895802 | \n",
- " 16.482148 | \n",
- " 7.376468 | \n",
- " 0.015943 | \n",
- " 18.927378 | \n",
- " 30.475427 | \n",
- " 24440.888112 | \n",
+ " 34.115238 | \n",
+ " 16.872400 | \n",
+ " 7.325016 | \n",
+ " 0.016499 | \n",
+ " 18.696834 | \n",
+ " 30.258743 | \n",
+ " 24628.829557 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1310,12 +1309,12 @@
"
\n",
" min | \n",
" NaN | \n",
- " 2.520000 | \n",
+ " 2.480000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 38.330000 | \n",
- " 14.070000 | \n",
+ " 13.050000 | \n",
" 0.000000 | \n",
" NaN | \n",
" NaN | \n",
@@ -1324,13 +1323,13 @@
"
\n",
" 25% | \n",
" NaN | \n",
- " 3.200000 | \n",
- " 2.270000 | \n",
+ " 3.240000 | \n",
+ " 2.380000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
- " 63.942500 | \n",
- " 99.690000 | \n",
- " 22500.000000 | \n",
+ " 64.112500 | \n",
+ " 99.730000 | \n",
+ " 21600.000000 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1338,11 +1337,11 @@
"
\n",
" 50% | \n",
" NaN | \n",
- " 4.680000 | \n",
+ " 4.720000 | \n",
" 5.110000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
- " 87.940000 | \n",
+ " 88.325000 | \n",
" 100.000000 | \n",
" 43200.000000 | \n",
" NaN | \n",
@@ -1352,11 +1351,11 @@
"
\n",
" 75% | \n",
" NaN | \n",
- " 52.187500 | \n",
- " 24.050000 | \n",
- " 2.177500 | \n",
+ " 53.227500 | \n",
+ " 24.810000 | \n",
+ " 1.917500 | \n",
" 0.020000 | \n",
- " 99.030000 | \n",
+ " 98.792500 | \n",
" 100.000000 | \n",
" 63900.000000 | \n",
" NaN | \n",
@@ -1366,10 +1365,10 @@
"
\n",
" max | \n",
" NaN | \n",
- " 139.030000 | \n",
- " 80.750000 | \n",
- " 27.580000 | \n",
- " 0.060000 | \n",
+ " 146.880000 | \n",
+ " 87.700000 | \n",
+ " 27.540000 | \n",
+ " 0.070000 | \n",
" 100.000000 | \n",
" 100.000000 | \n",
" 85500.000000 | \n",
@@ -1383,59 +1382,59 @@
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
- "count 1752 1752.000000 1752.000000 \n",
- "unique 1752 NaN NaN \n",
- "top 02/06/2018 02:00 NaN NaN \n",
+ "count 3504 3504.000000 3504.000000 \n",
+ "unique 3504 NaN NaN \n",
+ "top 16/11/2018 16:45 NaN NaN \n",
"freq 1 NaN NaN \n",
- "mean NaN 27.756787 13.375628 \n",
- "std NaN 32.895802 16.482148 \n",
- "min NaN 2.520000 0.000000 \n",
- "25% NaN 3.200000 2.270000 \n",
- "50% NaN 4.680000 5.110000 \n",
- "75% NaN 52.187500 24.050000 \n",
- "max NaN 139.030000 80.750000 \n",
+ "mean NaN 28.791849 13.764709 \n",
+ "std NaN 34.115238 16.872400 \n",
+ "min NaN 2.480000 0.000000 \n",
+ "25% NaN 3.240000 2.380000 \n",
+ "50% NaN 4.720000 5.110000 \n",
+ "75% NaN 53.227500 24.810000 \n",
+ "max NaN 146.880000 87.700000 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
- "count 1752.000000 1752.000000 \n",
+ "count 3504.000000 3504.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
- "mean 3.880634 0.011729 \n",
- "std 7.376468 0.015943 \n",
+ "mean 3.818382 0.012212 \n",
+ "std 7.325016 0.016499 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
- "75% 2.177500 0.020000 \n",
- "max 27.580000 0.060000 \n",
+ "75% 1.917500 0.020000 \n",
+ "max 27.540000 0.070000 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
- "count 1752.000000 1752.000000 \n",
+ "count 3504.000000 3504.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
- "mean 80.745548 84.345154 \n",
- "std 18.927378 30.475427 \n",
- "min 38.330000 14.070000 \n",
- "25% 63.942500 99.690000 \n",
- "50% 87.940000 100.000000 \n",
- "75% 99.030000 100.000000 \n",
+ "mean 80.931650 84.639817 \n",
+ "std 18.696834 30.258743 \n",
+ "min 38.330000 13.050000 \n",
+ "25% 64.112500 99.730000 \n",
+ "50% 88.325000 100.000000 \n",
+ "75% 98.792500 100.000000 \n",
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
- "count 1752.000000 1752 1752 1752 \n",
+ "count 3504.000000 3504 3504 3504 \n",
"unique NaN 2 7 3 \n",
- "top NaN Weekday Monday Light_Load \n",
- "freq NaN 1274 275 894 \n",
- "mean 43186.643836 NaN NaN NaN \n",
- "std 24440.888112 NaN NaN NaN \n",
+ "top NaN Weekday Tuesday Light_Load \n",
+ "freq NaN 2536 543 1768 \n",
+ "mean 42814.469178 NaN NaN NaN \n",
+ "std 24628.829557 NaN NaN NaN \n",
"min 0.000000 NaN NaN NaN \n",
- "25% 22500.000000 NaN NaN NaN \n",
+ "25% 21600.000000 NaN NaN NaN \n",
"50% 43200.000000 NaN NaN NaN \n",
"75% 63900.000000 NaN NaN NaN \n",
"max 85500.000000 NaN NaN NaN "
]
},
- "execution_count": 14,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -1443,6 +1442,18 @@
"source": [
"dev_data.describe(include='all')"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "banned-scottish",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_data.to_csv(\"steel_industry_data_test.csv\", encoding=\"utf-8\", index=False)\n",
+ "dev_data.to_csv(\"steel_industry_data_dev.csv\", encoding=\"utf-8\", index=False)\n",
+ "train_data.to_csv(\"steel_industry_data_train.csv\", encoding=\"utf-8\", index=False)"
+ ]
}
],
"metadata": {
@@ -1466,4 +1477,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
-}
+}
\ No newline at end of file
diff --git a/Jenkinsfile_stats b/Jenkinsfile_stats
index b76e432..781ea90 100644
--- a/Jenkinsfile_stats
+++ b/Jenkinsfile_stats
@@ -1,5 +1,7 @@
pipeline {
- agent any
+ agent {
+ docker { image 'ikami1/ium:v1' }
+ }
parameters {
buildSelector(
defaultSelector: lastSuccessful(),
diff --git a/download_dataset.sh b/download_dataset.sh
index a6db0e7..dc332dc 100644
--- a/download_dataset.sh
+++ b/download_dataset.sh
@@ -1,2 +1,2 @@
-kaggle datasets download -d csafrit2/steel-industry-energy-consumption --force
-unzip -o -j steel-industry-energy-consumption.zip
\ No newline at end of file
+kaggle datasets download -d csafrit2/steel-industry-energy-consumption
+unzip -o steel-industry-energy-consumption.zip
\ No newline at end of file
diff --git a/jenkinsfile b/jenkinsfile
index fcdee6e..88dada2 100644
--- a/jenkinsfile
+++ b/jenkinsfile
@@ -1,5 +1,7 @@
pipeline {
- agent any
+ agent {
+ dockerfile true
+ }
parameters {
string(
defaultValue: 'ikami1',
@@ -8,7 +10,7 @@ pipeline {
trim: false
)
password(
- defaultValue: 'c70ff184133bfabb351608b128e76cd2',
+ defaultValue: '',
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
name: 'KAGGLE_KEY'
)
@@ -38,7 +40,8 @@ pipeline {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
"KAGGLE_KEY=${params.KAGGLE_KEY}",
"CUTOFF=${params.CUTOFF}"]) {
- sh "./download.sh"
+ sh "./download_dataset.sh"
+ sh "python3 process_dataset.py"
archiveArtifacts artifacts: "steel_industry_data_test.csv, steel_industry_data_dev.csv, steel_industry_data_train.csv"
}
}