Zad 04.Docker - Konteneryzacja

This commit is contained in:
Cezary Gałązkiewicz 2022-04-11 00:40:48 +02:00
parent 924738d4b3
commit 92a55cfc38
5 changed files with 292 additions and 277 deletions

View File

@ -1,19 +1,18 @@
FROM ubuntu:latest
RUN apt update && apt install -y python3-pip
RUN apt install -y unzip
RUN pip install --user kaggle pandas seaborn sklearn
RUN apt install -y unzip python3
RUN pip3 install kaggle pandas seaborn sklearn
RUN mkdir ~/.kaggle/
RUN echo '{"username":"ikami1","key":"c70ff184133bfabb351608b128e76cd2"}' > ~/.kaggle/kaggle.json
WORKDIR /ium
#COPY ./download_dataset.sh ./
COPY ./Steel_industry_data.csv ./
COPY ./process_dataset.py ./
#COPY ./stats.sh ./
COPY download_dataset.sh process_dataset.py stats.sh Steel_industry_data.csv ./
RUN chmod a+x download_dataset.sh process_dataset.py
#CMD ./download_dataset.sh
CMD python3 process_dataset.py
#CMD python3 process_dataset.py
#CMD ./stats.sh

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 1,
"id": "expected-payroll",
"metadata": {},
"outputs": [
@ -10,36 +10,35 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: kaggle in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (1.5.12)\n",
"Requirement already satisfied: six>=1.10 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (1.15.0)\n",
"Requirement already satisfied: certifi in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (2021.10.8)\n",
"Requirement already satisfied: python-dateutil in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (2.8.1)\n",
"Requirement already satisfied: requests in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (2.27.1)\n",
"Requirement already satisfied: tqdm in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (4.59.0)\n",
"Requirement already satisfied: python-slugify in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (6.1.1)\n",
"Requirement already satisfied: urllib3 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from kaggle) (1.26.9)\n",
"Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->kaggle) (3.3)\n",
"Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from requests->kaggle) (2.0.12)\n",
"Requirement already satisfied: pandas in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (1.4.1)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (2022.1)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (2.8.1)\n",
"Requirement already satisfied: numpy>=1.18.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (1.20.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.15.0)\n",
"Requirement already satisfied: seaborn in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (0.11.2)\n",
"Requirement already satisfied: pandas>=0.23 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (1.4.1)\n",
"Requirement already satisfied: numpy>=1.15 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (1.20.1)\n",
"Requirement already satisfied: scipy>=1.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (1.6.1)\n",
"Requirement already satisfied: matplotlib>=2.2 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from seaborn) (3.5.1)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (20.9)\n",
"Requirement already satisfied: pyparsing>=2.2.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (4.31.1)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (9.0.1)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.4.0)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas>=0.23->seaborn) (2022.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.15.0)\n"
"Requirement already satisfied: kaggle in c:\\users\\cgala\\anaconda3\\lib\\site-packages (1.5.12)\n",
"Requirement already satisfied: tqdm in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (4.50.2)\n",
"Requirement already satisfied: requests in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2.24.0)\n",
"Requirement already satisfied: certifi in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2020.6.20)\n",
"Requirement already satisfied: six>=1.10 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (1.15.0)\n",
"Requirement already satisfied: python-slugify in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (6.1.1)\n",
"Requirement already satisfied: urllib3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (1.25.11)\n",
"Requirement already satisfied: python-dateutil in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from kaggle) (2.8.1)\n",
"Requirement already satisfied: idna<3,>=2.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from requests->kaggle) (2.10)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from requests->kaggle) (3.0.4)\n",
"Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: pandas in c:\\users\\cgala\\anaconda3\\lib\\site-packages (1.1.3)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (2.8.1)\n",
"Requirement already satisfied: numpy>=1.15.4 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (1.19.2)\n",
"Requirement already satisfied: pytz>=2017.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas) (2020.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
"Requirement already satisfied: seaborn in c:\\users\\cgala\\anaconda3\\lib\\site-packages (0.11.0)\n",
"Requirement already satisfied: numpy>=1.15 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.19.2)\n",
"Requirement already satisfied: scipy>=1.0 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.5.2)\n",
"Requirement already satisfied: matplotlib>=2.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (3.3.2)\n",
"Requirement already satisfied: pandas>=0.23 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from seaborn) (1.1.3)\n",
"Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.3.0)\n",
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (8.0.1)\n",
"Requirement already satisfied: certifi>=2020.06.20 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2020.6.20)\n",
"Requirement already satisfied: pytz>=2017.2 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from pandas>=0.23->seaborn) (2020.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\cgala\\anaconda3\\lib\\site-packages (from python-dateutil>=2.1->matplotlib>=2.2->seaborn) (1.15.0)\n"
]
}
],
@ -51,7 +50,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"id": "genetic-plaintiff",
"metadata": {},
"outputs": [
@ -69,18 +68,18 @@
"text": [
"\n",
" 0%| | 0.00/484k [00:00<?, ?B/s]\n",
"100%|##########| 484k/484k [00:00<00:00, 3.32MB/s]\n",
"100%|##########| 484k/484k [00:00<00:00, 3.29MB/s]\n"
"100%|##########| 484k/484k [00:00<00:00, 2.36MB/s]\n",
"100%|##########| 484k/484k [00:00<00:00, 2.36MB/s]\n"
]
}
],
"source": [
"!kaggle datasets download -d csafrit2/steel-industry-energy-consumption"
"!kaggle datasets download -d csafrit2/steel-industry-energy-consumption --force"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"id": "compatible-following",
"metadata": {},
"outputs": [
@ -628,7 +627,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 6,
"id": "loved-delight",
"metadata": {},
"outputs": [
@ -637,19 +636,19 @@
"output_type": "stream",
"text": [
"Training set size:\n",
"(31536, 11)\n",
"(28032, 11)\n",
"Testing set size:\n",
"(1752, 11)\n",
"(3504, 11)\n",
"Dev set size:\n",
"(1752, 11)\n"
"(3504, 11)\n"
]
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"train_data, test_data = train_test_split(energy_data, test_size=3504, random_state=1)\n",
"test_data, dev_data = train_test_split(test_data, test_size=1752, random_state=1)\n",
"train_data, test_data = train_test_split(energy_data, test_size=7008, random_state=1)\n",
"test_data, dev_data = train_test_split(test_data, test_size=3504, random_state=1)\n",
"print('Training set size:')\n",
"print(train_data.shape)\n",
"print('Testing set size:')\n",
@ -660,7 +659,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 7,
"id": "formed-virginia",
"metadata": {},
"outputs": [
@ -701,21 +700,21 @@
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>31536</td>\n",
" <td>31536.000000</td>\n",
" <td>31536.000000</td>\n",
" <td>31536.000000</td>\n",
" <td>31536.000000</td>\n",
" <td>31536.000000</td>\n",
" <td>31536.000000</td>\n",
" <td>31536.000000</td>\n",
" <td>31536</td>\n",
" <td>31536</td>\n",
" <td>31536</td>\n",
" <td>28032</td>\n",
" <td>28032.000000</td>\n",
" <td>28032.000000</td>\n",
" <td>28032.000000</td>\n",
" <td>28032.000000</td>\n",
" <td>28032.000000</td>\n",
" <td>28032.000000</td>\n",
" <td>28032.000000</td>\n",
" <td>28032</td>\n",
" <td>28032</td>\n",
" <td>28032</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>31536</td>\n",
" <td>28032</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -729,7 +728,7 @@
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>30/01/2018 00:15</td>\n",
" <td>07/08/2018 14:15</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -751,20 +750,20 @@
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>22514</td>\n",
" <td>4560</td>\n",
" <td>16280</td>\n",
" <td>19998</td>\n",
" <td>4087</td>\n",
" <td>14467</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>27.369449</td>\n",
" <td>13.037946</td>\n",
" <td>3.866059</td>\n",
" <td>0.011513</td>\n",
" <td>80.525058</td>\n",
" <td>84.410086</td>\n",
" <td>42707.363014</td>\n",
" <td>27.340174</td>\n",
" <td>13.026801</td>\n",
" <td>3.875001</td>\n",
" <td>0.011498</td>\n",
" <td>80.520145</td>\n",
" <td>84.369511</td>\n",
" <td>42761.429795</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -772,13 +771,13 @@
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>33.473304</td>\n",
" <td>16.302910</td>\n",
" <td>7.434250</td>\n",
" <td>0.016159</td>\n",
" <td>18.929571</td>\n",
" <td>30.436675</td>\n",
" <td>24968.193911</td>\n",
" <td>33.469130</td>\n",
" <td>16.289348</td>\n",
" <td>7.445898</td>\n",
" <td>0.016153</td>\n",
" <td>18.932825</td>\n",
" <td>30.462193</td>\n",
" <td>24944.585138</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -801,11 +800,11 @@
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>3.200000</td>\n",
" <td>2.330000</td>\n",
" <td>2.300000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>63.200000</td>\n",
" <td>99.720000</td>\n",
" <td>63.227500</td>\n",
" <td>99.710000</td>\n",
" <td>20700.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -818,9 +817,9 @@
" <td>5.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>87.900000</td>\n",
" <td>87.870000</td>\n",
" <td>100.000000</td>\n",
" <td>42300.000000</td>\n",
" <td>43200.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -828,13 +827,13 @@
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>51.230000</td>\n",
" <td>22.650000</td>\n",
" <td>1.980000</td>\n",
" <td>51.190000</td>\n",
" <td>22.750000</td>\n",
" <td>2.020000</td>\n",
" <td>0.020000</td>\n",
" <td>98.970000</td>\n",
" <td>99.000000</td>\n",
" <td>100.000000</td>\n",
" <td>63900.000000</td>\n",
" <td>64800.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -859,59 +858,59 @@
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
"count 31536 31536.000000 31536.000000 \n",
"unique 31536 NaN NaN \n",
"top 30/01/2018 00:15 NaN NaN \n",
"count 28032 28032.000000 28032.000000 \n",
"unique 28032 NaN NaN \n",
"top 07/08/2018 14:15 NaN NaN \n",
"freq 1 NaN NaN \n",
"mean NaN 27.369449 13.037946 \n",
"std NaN 33.473304 16.302910 \n",
"mean NaN 27.340174 13.026801 \n",
"std NaN 33.469130 16.289348 \n",
"min NaN 0.000000 0.000000 \n",
"25% NaN 3.200000 2.330000 \n",
"25% NaN 3.200000 2.300000 \n",
"50% NaN 4.570000 5.000000 \n",
"75% NaN 51.230000 22.650000 \n",
"75% NaN 51.190000 22.750000 \n",
"max NaN 157.180000 96.910000 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
"count 31536.000000 31536.000000 \n",
"count 28032.000000 28032.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 3.866059 0.011513 \n",
"std 7.434250 0.016159 \n",
"mean 3.875001 0.011498 \n",
"std 7.445898 0.016153 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
"75% 1.980000 0.020000 \n",
"75% 2.020000 0.020000 \n",
"max 27.760000 0.070000 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
"count 31536.000000 31536.000000 \n",
"count 28032.000000 28032.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 80.525058 84.410086 \n",
"std 18.929571 30.436675 \n",
"mean 80.520145 84.369511 \n",
"std 18.932825 30.462193 \n",
"min 0.000000 0.000000 \n",
"25% 63.200000 99.720000 \n",
"50% 87.900000 100.000000 \n",
"75% 98.970000 100.000000 \n",
"25% 63.227500 99.710000 \n",
"50% 87.870000 100.000000 \n",
"75% 99.000000 100.000000 \n",
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
"count 31536.000000 31536 31536 31536 \n",
"count 28032.000000 28032 28032 28032 \n",
"unique NaN 2 7 3 \n",
"top NaN Weekday Monday Light_Load \n",
"freq NaN 22514 4560 16280 \n",
"mean 42707.363014 NaN NaN NaN \n",
"std 24968.193911 NaN NaN NaN \n",
"freq NaN 19998 4087 14467 \n",
"mean 42761.429795 NaN NaN NaN \n",
"std 24944.585138 NaN NaN NaN \n",
"min 0.000000 NaN NaN NaN \n",
"25% 20700.000000 NaN NaN NaN \n",
"50% 42300.000000 NaN NaN NaN \n",
"75% 63900.000000 NaN NaN NaN \n",
"50% 43200.000000 NaN NaN NaN \n",
"75% 64800.000000 NaN NaN NaN \n",
"max 85500.000000 NaN NaN NaN "
]
},
"execution_count": 12,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@ -922,7 +921,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 8,
"id": "radical-score",
"metadata": {},
"outputs": [
@ -963,21 +962,21 @@
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1752</td>\n",
" <td>1752.000000</td>\n",
" <td>1752.000000</td>\n",
" <td>1752.000000</td>\n",
" <td>1752.000000</td>\n",
" <td>1752.000000</td>\n",
" <td>1752.000000</td>\n",
" <td>1752.000000</td>\n",
" <td>1752</td>\n",
" <td>1752</td>\n",
" <td>1752</td>\n",
" <td>3504</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504</td>\n",
" <td>3504</td>\n",
" <td>3504</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>1752</td>\n",
" <td>3504</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -991,7 +990,7 @@
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>07/05/2018 06:00</td>\n",
" <td>20/06/2018 13:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -1000,7 +999,7 @@
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Weekday</td>\n",
" <td>Tuesday</td>\n",
" <td>Wednesday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
@ -1013,20 +1012,20 @@
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1268</td>\n",
" <td>291</td>\n",
" <td>898</td>\n",
" <td>2522</td>\n",
" <td>527</td>\n",
" <td>1837</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>27.330982</td>\n",
" <td>12.649024</td>\n",
" <td>3.949281</td>\n",
" <td>0.011530</td>\n",
" <td>81.364526</td>\n",
" <td>83.630702</td>\n",
" <td>43080.821918</td>\n",
" <td>26.355685</td>\n",
" <td>12.374717</td>\n",
" <td>3.891093</td>\n",
" <td>0.011050</td>\n",
" <td>80.687751</td>\n",
" <td>84.082794</td>\n",
" <td>42594.092466</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -1034,13 +1033,13 @@
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>33.484216</td>\n",
" <td>16.185283</td>\n",
" <td>7.298637</td>\n",
" <td>0.016224</td>\n",
" <td>18.758338</td>\n",
" <td>30.801180</td>\n",
" <td>24944.325392</td>\n",
" <td>32.519749</td>\n",
" <td>15.830961</td>\n",
" <td>7.353028</td>\n",
" <td>0.015762</td>\n",
" <td>19.053018</td>\n",
" <td>30.614144</td>\n",
" <td>25222.804637</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -1052,7 +1051,7 @@
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>41.120000</td>\n",
" <td>40.290000</td>\n",
" <td>12.540000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
@ -1062,13 +1061,13 @@
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>3.200000</td>\n",
" <td>1.392500</td>\n",
" <td>3.192500</td>\n",
" <td>2.090000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>64.630000</td>\n",
" <td>99.180000</td>\n",
" <td>21600.000000</td>\n",
" <td>63.130000</td>\n",
" <td>99.562500</td>\n",
" <td>20700.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -1076,13 +1075,13 @@
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>4.570000</td>\n",
" <td>4.930000</td>\n",
" <td>4.500000</td>\n",
" <td>4.900000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>88.955000</td>\n",
" <td>88.210000</td>\n",
" <td>100.000000</td>\n",
" <td>43200.000000</td>\n",
" <td>42300.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -1090,11 +1089,11 @@
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>49.870000</td>\n",
" <td>21.240000</td>\n",
" <td>3.837500</td>\n",
" <td>49.570000</td>\n",
" <td>20.700000</td>\n",
" <td>2.967500</td>\n",
" <td>0.020000</td>\n",
" <td>99.852500</td>\n",
" <td>99.390000</td>\n",
" <td>100.000000</td>\n",
" <td>64800.000000</td>\n",
" <td>NaN</td>\n",
@ -1104,9 +1103,9 @@
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>143.930000</td>\n",
" <td>87.700000</td>\n",
" <td>27.540000</td>\n",
" <td>153.140000</td>\n",
" <td>82.940000</td>\n",
" <td>27.650000</td>\n",
" <td>0.070000</td>\n",
" <td>100.000000</td>\n",
" <td>100.000000</td>\n",
@ -1121,59 +1120,59 @@
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
"count 1752 1752.000000 1752.000000 \n",
"unique 1752 NaN NaN \n",
"top 07/05/2018 06:00 NaN NaN \n",
"count 3504 3504.000000 3504.000000 \n",
"unique 3504 NaN NaN \n",
"top 20/06/2018 13:00 NaN NaN \n",
"freq 1 NaN NaN \n",
"mean NaN 27.330982 12.649024 \n",
"std NaN 33.484216 16.185283 \n",
"mean NaN 26.355685 12.374717 \n",
"std NaN 32.519749 15.830961 \n",
"min NaN 2.480000 0.000000 \n",
"25% NaN 3.200000 1.392500 \n",
"50% NaN 4.570000 4.930000 \n",
"75% NaN 49.870000 21.240000 \n",
"max NaN 143.930000 87.700000 \n",
"25% NaN 3.192500 2.090000 \n",
"50% NaN 4.500000 4.900000 \n",
"75% NaN 49.570000 20.700000 \n",
"max NaN 153.140000 82.940000 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
"count 1752.000000 1752.000000 \n",
"count 3504.000000 3504.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 3.949281 0.011530 \n",
"std 7.298637 0.016224 \n",
"mean 3.891093 0.011050 \n",
"std 7.353028 0.015762 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
"75% 3.837500 0.020000 \n",
"max 27.540000 0.070000 \n",
"75% 2.967500 0.020000 \n",
"max 27.650000 0.070000 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
"count 1752.000000 1752.000000 \n",
"count 3504.000000 3504.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 81.364526 83.630702 \n",
"std 18.758338 30.801180 \n",
"min 41.120000 12.540000 \n",
"25% 64.630000 99.180000 \n",
"50% 88.955000 100.000000 \n",
"75% 99.852500 100.000000 \n",
"mean 80.687751 84.082794 \n",
"std 19.053018 30.614144 \n",
"min 40.290000 12.540000 \n",
"25% 63.130000 99.562500 \n",
"50% 88.210000 100.000000 \n",
"75% 99.390000 100.000000 \n",
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
"count 1752.000000 1752 1752 1752 \n",
"count 3504.000000 3504 3504 3504 \n",
"unique NaN 2 7 3 \n",
"top NaN Weekday Tuesday Light_Load \n",
"freq NaN 1268 291 898 \n",
"mean 43080.821918 NaN NaN NaN \n",
"std 24944.325392 NaN NaN NaN \n",
"top NaN Weekday Wednesday Light_Load \n",
"freq NaN 2522 527 1837 \n",
"mean 42594.092466 NaN NaN NaN \n",
"std 25222.804637 NaN NaN NaN \n",
"min 0.000000 NaN NaN NaN \n",
"25% 21600.000000 NaN NaN NaN \n",
"50% 43200.000000 NaN NaN NaN \n",
"25% 20700.000000 NaN NaN NaN \n",
"50% 42300.000000 NaN NaN NaN \n",
"75% 64800.000000 NaN NaN NaN \n",
"max 85500.000000 NaN NaN NaN "
]
},
"execution_count": 13,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@ -1184,7 +1183,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 9,
"id": "attempted-lafayette",
"metadata": {},
"outputs": [
@ -1225,21 +1224,21 @@
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1752</td>\n",
" <td>1752.000000</td>\n",
" <td>1752.000000</td>\n",
" <td>1752.000000</td>\n",
" <td>1752.000000</td>\n",
" <td>1752.000000</td>\n",
" <td>1752.000000</td>\n",
" <td>1752.000000</td>\n",
" <td>1752</td>\n",
" <td>1752</td>\n",
" <td>1752</td>\n",
" <td>3504</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504.000000</td>\n",
" <td>3504</td>\n",
" <td>3504</td>\n",
" <td>3504</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>1752</td>\n",
" <td>3504</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -1253,7 +1252,7 @@
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>02/06/2018 02:00</td>\n",
" <td>16/11/2018 16:45</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -1262,7 +1261,7 @@
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Weekday</td>\n",
" <td>Monday</td>\n",
" <td>Tuesday</td>\n",
" <td>Light_Load</td>\n",
" </tr>\n",
" <tr>\n",
@ -1275,20 +1274,20 @@
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1274</td>\n",
" <td>275</td>\n",
" <td>894</td>\n",
" <td>2536</td>\n",
" <td>543</td>\n",
" <td>1768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>27.756787</td>\n",
" <td>13.375628</td>\n",
" <td>3.880634</td>\n",
" <td>0.011729</td>\n",
" <td>80.745548</td>\n",
" <td>84.345154</td>\n",
" <td>43186.643836</td>\n",
" <td>28.791849</td>\n",
" <td>13.764709</td>\n",
" <td>3.818382</td>\n",
" <td>0.012212</td>\n",
" <td>80.931650</td>\n",
" <td>84.639817</td>\n",
" <td>42814.469178</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -1296,13 +1295,13 @@
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>32.895802</td>\n",
" <td>16.482148</td>\n",
" <td>7.376468</td>\n",
" <td>0.015943</td>\n",
" <td>18.927378</td>\n",
" <td>30.475427</td>\n",
" <td>24440.888112</td>\n",
" <td>34.115238</td>\n",
" <td>16.872400</td>\n",
" <td>7.325016</td>\n",
" <td>0.016499</td>\n",
" <td>18.696834</td>\n",
" <td>30.258743</td>\n",
" <td>24628.829557</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -1310,12 +1309,12 @@
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>2.520000</td>\n",
" <td>2.480000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>38.330000</td>\n",
" <td>14.070000</td>\n",
" <td>13.050000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -1324,13 +1323,13 @@
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>3.200000</td>\n",
" <td>2.270000</td>\n",
" <td>3.240000</td>\n",
" <td>2.380000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>63.942500</td>\n",
" <td>99.690000</td>\n",
" <td>22500.000000</td>\n",
" <td>64.112500</td>\n",
" <td>99.730000</td>\n",
" <td>21600.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@ -1338,11 +1337,11 @@
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>4.680000</td>\n",
" <td>4.720000</td>\n",
" <td>5.110000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>87.940000</td>\n",
" <td>88.325000</td>\n",
" <td>100.000000</td>\n",
" <td>43200.000000</td>\n",
" <td>NaN</td>\n",
@ -1352,11 +1351,11 @@
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>52.187500</td>\n",
" <td>24.050000</td>\n",
" <td>2.177500</td>\n",
" <td>53.227500</td>\n",
" <td>24.810000</td>\n",
" <td>1.917500</td>\n",
" <td>0.020000</td>\n",
" <td>99.030000</td>\n",
" <td>98.792500</td>\n",
" <td>100.000000</td>\n",
" <td>63900.000000</td>\n",
" <td>NaN</td>\n",
@ -1366,10 +1365,10 @@
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>139.030000</td>\n",
" <td>80.750000</td>\n",
" <td>27.580000</td>\n",
" <td>0.060000</td>\n",
" <td>146.880000</td>\n",
" <td>87.700000</td>\n",
" <td>27.540000</td>\n",
" <td>0.070000</td>\n",
" <td>100.000000</td>\n",
" <td>100.000000</td>\n",
" <td>85500.000000</td>\n",
@ -1383,59 +1382,59 @@
],
"text/plain": [
" date Usage_kWh Lagging_Current_Reactive.Power_kVarh \\\n",
"count 1752 1752.000000 1752.000000 \n",
"unique 1752 NaN NaN \n",
"top 02/06/2018 02:00 NaN NaN \n",
"count 3504 3504.000000 3504.000000 \n",
"unique 3504 NaN NaN \n",
"top 16/11/2018 16:45 NaN NaN \n",
"freq 1 NaN NaN \n",
"mean NaN 27.756787 13.375628 \n",
"std NaN 32.895802 16.482148 \n",
"min NaN 2.520000 0.000000 \n",
"25% NaN 3.200000 2.270000 \n",
"50% NaN 4.680000 5.110000 \n",
"75% NaN 52.187500 24.050000 \n",
"max NaN 139.030000 80.750000 \n",
"mean NaN 28.791849 13.764709 \n",
"std NaN 34.115238 16.872400 \n",
"min NaN 2.480000 0.000000 \n",
"25% NaN 3.240000 2.380000 \n",
"50% NaN 4.720000 5.110000 \n",
"75% NaN 53.227500 24.810000 \n",
"max NaN 146.880000 87.700000 \n",
"\n",
" Leading_Current_Reactive_Power_kVarh CO2(tCO2) \\\n",
"count 1752.000000 1752.000000 \n",
"count 3504.000000 3504.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 3.880634 0.011729 \n",
"std 7.376468 0.015943 \n",
"mean 3.818382 0.012212 \n",
"std 7.325016 0.016499 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 0.000000 \n",
"75% 2.177500 0.020000 \n",
"max 27.580000 0.060000 \n",
"75% 1.917500 0.020000 \n",
"max 27.540000 0.070000 \n",
"\n",
" Lagging_Current_Power_Factor Leading_Current_Power_Factor \\\n",
"count 1752.000000 1752.000000 \n",
"count 3504.000000 3504.000000 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 80.745548 84.345154 \n",
"std 18.927378 30.475427 \n",
"min 38.330000 14.070000 \n",
"25% 63.942500 99.690000 \n",
"50% 87.940000 100.000000 \n",
"75% 99.030000 100.000000 \n",
"mean 80.931650 84.639817 \n",
"std 18.696834 30.258743 \n",
"min 38.330000 13.050000 \n",
"25% 64.112500 99.730000 \n",
"50% 88.325000 100.000000 \n",
"75% 98.792500 100.000000 \n",
"max 100.000000 100.000000 \n",
"\n",
" NSM WeekStatus Day_of_week Load_Type \n",
"count 1752.000000 1752 1752 1752 \n",
"count 3504.000000 3504 3504 3504 \n",
"unique NaN 2 7 3 \n",
"top NaN Weekday Monday Light_Load \n",
"freq NaN 1274 275 894 \n",
"mean 43186.643836 NaN NaN NaN \n",
"std 24440.888112 NaN NaN NaN \n",
"top NaN Weekday Tuesday Light_Load \n",
"freq NaN 2536 543 1768 \n",
"mean 42814.469178 NaN NaN NaN \n",
"std 24628.829557 NaN NaN NaN \n",
"min 0.000000 NaN NaN NaN \n",
"25% 22500.000000 NaN NaN NaN \n",
"25% 21600.000000 NaN NaN NaN \n",
"50% 43200.000000 NaN NaN NaN \n",
"75% 63900.000000 NaN NaN NaN \n",
"max 85500.000000 NaN NaN NaN "
]
},
"execution_count": 14,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@ -1443,6 +1442,18 @@
"source": [
"dev_data.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "banned-scottish",
"metadata": {},
"outputs": [],
"source": [
"test_data.to_csv(\"steel_industry_data_test.csv\", encoding=\"utf-8\", index=False)\n",
"dev_data.to_csv(\"steel_industry_data_dev.csv\", encoding=\"utf-8\", index=False)\n",
"train_data.to_csv(\"steel_industry_data_train.csv\", encoding=\"utf-8\", index=False)"
]
}
],
"metadata": {

View File

@ -1,5 +1,7 @@
pipeline {
agent any
agent {
docker { image 'ikami1/ium:v1' }
}
parameters {
buildSelector(
defaultSelector: lastSuccessful(),

View File

@ -1,2 +1,2 @@
kaggle datasets download -d csafrit2/steel-industry-energy-consumption --force
unzip -o -j steel-industry-energy-consumption.zip
kaggle datasets download -d csafrit2/steel-industry-energy-consumption
unzip -o steel-industry-energy-consumption.zip

View File

@ -1,5 +1,7 @@
pipeline {
agent any
agent {
dockerfile true
}
parameters {
string(
defaultValue: 'ikami1',
@ -8,7 +10,7 @@ pipeline {
trim: false
)
password(
defaultValue: 'c70ff184133bfabb351608b128e76cd2',
defaultValue: '',
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
name: 'KAGGLE_KEY'
)
@ -38,7 +40,8 @@ pipeline {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
"KAGGLE_KEY=${params.KAGGLE_KEY}",
"CUTOFF=${params.CUTOFF}"]) {
sh "./download.sh"
sh "./download_dataset.sh"
sh "python3 process_dataset.py"
archiveArtifacts artifacts: "steel_industry_data_test.csv, steel_industry_data_dev.csv, steel_industry_data_train.csv"
}
}