ium_464914/IUM_2.ipynb

1259 lines
449 KiB
Plaintext
Raw Normal View History

2024-03-17 18:28:15 +01:00
{
"cells": [
{
"cell_type": "code",
2024-04-13 18:12:31 +02:00
"execution_count": 1,
2024-03-17 18:28:15 +01:00
"metadata": {
2024-04-13 18:12:31 +02:00
"scrolled": true,
"pycharm": {
"is_executing": true
},
"ExecuteTime": {
"end_time": "2024-04-13T16:07:39.757038200Z",
"start_time": "2024-04-13T16:07:01.476848900Z"
}
2024-03-17 18:28:15 +01:00
},
2024-04-13 18:12:31 +02:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: kaggle in \\\\files\\students\\s464914\\.appdata\\python\\python310\\site-packages (1.6.6)\n",
"Requirement already satisfied: six>=1.10 in c:\\software\\python3\\lib\\site-packages (from kaggle) (1.16.0)\n",
"Requirement already satisfied: certifi in c:\\software\\python3\\lib\\site-packages (from kaggle) (2023.7.22)\n",
"Requirement already satisfied: python-dateutil in c:\\software\\python3\\lib\\site-packages (from kaggle) (2.8.2)\n",
"Requirement already satisfied: requests in c:\\software\\python3\\lib\\site-packages (from kaggle) (2.31.0)\n",
"Requirement already satisfied: tqdm in c:\\software\\python3\\lib\\site-packages (from kaggle) (4.66.1)\n",
"Requirement already satisfied: python-slugify in \\\\files\\students\\s464914\\.appdata\\python\\python310\\site-packages (from kaggle) (8.0.4)\n",
"Requirement already satisfied: urllib3 in c:\\software\\python3\\lib\\site-packages (from kaggle) (1.26.16)\n",
"Requirement already satisfied: bleach in c:\\software\\python3\\lib\\site-packages (from kaggle) (6.0.0)\n",
"Requirement already satisfied: webencodings in c:\\software\\python3\\lib\\site-packages (from bleach->kaggle) (0.5.1)\n",
"Requirement already satisfied: text-unidecode>=1.3 in \\\\files\\students\\s464914\\.appdata\\python\\python310\\site-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\software\\python3\\lib\\site-packages (from requests->kaggle) (3.2.0)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\software\\python3\\lib\\site-packages (from requests->kaggle) (3.4)\n",
"Requirement already satisfied: colorama in c:\\software\\python3\\lib\\site-packages (from tqdm->kaggle) (0.4.6)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"[notice] A new release of pip is available: 23.0.1 -> 24.0\n",
"[notice] To update, run: python3.exe -m pip install --upgrade pip\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pandas in c:\\software\\python3\\lib\\site-packages (2.0.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\software\\python3\\lib\\site-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2023.3)\n",
"Requirement already satisfied: tzdata>=2022.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2023.3)\n",
"Requirement already satisfied: numpy>=1.21.0 in c:\\software\\python3\\lib\\site-packages (from pandas) (1.24.3)\n",
"Requirement already satisfied: six>=1.5 in c:\\software\\python3\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"[notice] A new release of pip is available: 23.0.1 -> 24.0\n",
"[notice] To update, run: python3.exe -m pip install --upgrade pip\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: scikit-learn in c:\\software\\python3\\lib\\site-packages (1.3.0)\n",
"Requirement already satisfied: numpy>=1.17.3 in c:\\software\\python3\\lib\\site-packages (from scikit-learn) (1.24.3)\n",
"Requirement already satisfied: scipy>=1.5.0 in c:\\software\\python3\\lib\\site-packages (from scikit-learn) (1.11.2)\n",
"Requirement already satisfied: joblib>=1.1.1 in c:\\software\\python3\\lib\\site-packages (from scikit-learn) (1.3.2)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\software\\python3\\lib\\site-packages (from scikit-learn) (3.2.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"[notice] A new release of pip is available: 23.0.1 -> 24.0\n",
"[notice] To update, run: python3.exe -m pip install --upgrade pip\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: matplotlib in c:\\software\\python3\\lib\\site-packages (3.7.2)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\software\\python3\\lib\\site-packages (from matplotlib) (1.1.0)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\software\\python3\\lib\\site-packages (from matplotlib) (0.11.0)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\software\\python3\\lib\\site-packages (from matplotlib) (4.42.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\software\\python3\\lib\\site-packages (from matplotlib) (1.4.4)\n",
"Requirement already satisfied: numpy>=1.20 in c:\\software\\python3\\lib\\site-packages (from matplotlib) (1.24.3)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\software\\python3\\lib\\site-packages (from matplotlib) (23.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\software\\python3\\lib\\site-packages (from matplotlib) (10.0.0)\n",
"Requirement already satisfied: pyparsing<3.1,>=2.3.1 in c:\\software\\python3\\lib\\site-packages (from matplotlib) (3.0.9)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\software\\python3\\lib\\site-packages (from matplotlib) (2.8.2)\n",
"Requirement already satisfied: six>=1.5 in c:\\software\\python3\\lib\\site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"[notice] A new release of pip is available: 23.0.1 -> 24.0\n",
"[notice] To update, run: python3.exe -m pip install --upgrade pip\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: geopandas in \\\\files\\students\\s464914\\.appdata\\python\\python310\\site-packages (0.14.3)\n",
"Requirement already satisfied: fiona>=1.8.21 in \\\\files\\students\\s464914\\.appdata\\python\\python310\\site-packages (from geopandas) (1.9.6)\n",
"Requirement already satisfied: packaging in c:\\software\\python3\\lib\\site-packages (from geopandas) (23.1)\n",
"Requirement already satisfied: pandas>=1.4.0 in c:\\software\\python3\\lib\\site-packages (from geopandas) (2.0.3)\n",
"Requirement already satisfied: pyproj>=3.3.0 in \\\\files\\students\\s464914\\.appdata\\python\\python310\\site-packages (from geopandas) (3.6.1)\n",
"Requirement already satisfied: shapely>=1.8.0 in \\\\files\\students\\s464914\\.appdata\\python\\python310\\site-packages (from geopandas) (2.0.3)\n",
"Requirement already satisfied: attrs>=19.2.0 in c:\\software\\python3\\lib\\site-packages (from fiona>=1.8.21->geopandas) (23.1.0)\n",
"Requirement already satisfied: certifi in c:\\software\\python3\\lib\\site-packages (from fiona>=1.8.21->geopandas) (2023.7.22)\n",
"Requirement already satisfied: click~=8.0 in c:\\software\\python3\\lib\\site-packages (from fiona>=1.8.21->geopandas) (8.1.7)\n",
"Requirement already satisfied: click-plugins>=1.0 in \\\\files\\students\\s464914\\.appdata\\python\\python310\\site-packages (from fiona>=1.8.21->geopandas) (1.1.1)\n",
"Requirement already satisfied: cligj>=0.5 in \\\\files\\students\\s464914\\.appdata\\python\\python310\\site-packages (from fiona>=1.8.21->geopandas) (0.7.2)\n",
"Requirement already satisfied: six in c:\\software\\python3\\lib\\site-packages (from fiona>=1.8.21->geopandas) (1.16.0)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\software\\python3\\lib\\site-packages (from pandas>=1.4.0->geopandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\software\\python3\\lib\\site-packages (from pandas>=1.4.0->geopandas) (2023.3)\n",
"Requirement already satisfied: tzdata>=2022.1 in c:\\software\\python3\\lib\\site-packages (from pandas>=1.4.0->geopandas) (2023.3)\n",
"Requirement already satisfied: numpy>=1.21.0 in c:\\software\\python3\\lib\\site-packages (from pandas>=1.4.0->geopandas) (1.24.3)\n",
"Requirement already satisfied: colorama in c:\\software\\python3\\lib\\site-packages (from click~=8.0->fiona>=1.8.21->geopandas) (0.4.6)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"[notice] A new release of pip is available: 23.0.1 -> 24.0\n",
"[notice] To update, run: python3.exe -m pip install --upgrade pip\n"
]
}
],
2024-03-17 18:28:15 +01:00
"source": [
"%pip install --user kaggle \n",
"%pip install --user pandas\n",
"%pip install --user scikit-learn\n",
"%pip install --user matplotlib\n",
"%pip install --user geopandas"
]
},
{
"cell_type": "code",
2024-04-13 17:39:48 +02:00
"execution_count": 2,
2024-03-17 18:42:28 +01:00
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:38:40.019389600Z",
"start_time": "2024-03-17T17:38:36.535384600Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [],
"source": [
"import matplotlib.pyplot as plt \n",
"import pandas as pd"
]
},
{
"cell_type": "code",
2024-04-13 18:12:31 +02:00
"execution_count": 4,
"metadata": {
"pycharm": {
"is_executing": true
},
"ExecuteTime": {
"end_time": "2024-04-13T16:11:03.249257400Z",
"start_time": "2024-04-13T16:10:58.745694100Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading forest-cover-type-dataset.zip to J:\\PycharmProjects\\ium_464914\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
" 0%| | 0.00/11.2M [00:00<?, ?B/s]\n",
" 9%|8 | 1.00M/11.2M [00:00<00:06, 1.56MB/s]\n",
" 18%|#7 | 2.00M/11.2M [00:00<00:03, 3.10MB/s]\n",
" 36%|###5 | 4.00M/11.2M [00:00<00:01, 6.25MB/s]\n",
" 54%|#####3 | 6.00M/11.2M [00:01<00:00, 9.19MB/s]\n",
" 81%|######## | 9.00M/11.2M [00:01<00:00, 13.0MB/s]\n",
"100%|##########| 11.2M/11.2M [00:01<00:00, 9.30MB/s]\n"
]
}
],
2024-03-17 18:28:15 +01:00
"source": [
2024-04-13 17:39:48 +02:00
"!kaggle datasets download -d uciml/forest-cover-type-dataset"
2024-03-17 18:28:15 +01:00
]
},
{
"cell_type": "code",
2024-04-13 18:12:31 +02:00
"execution_count": 5,
2024-03-17 18:42:28 +01:00
"metadata": {
"ExecuteTime": {
2024-04-13 18:12:31 +02:00
"end_time": "2024-04-13T16:11:41.214712500Z",
"start_time": "2024-04-13T16:11:37.462860300Z"
2024-03-17 18:42:28 +01:00
}
},
2024-04-13 18:12:31 +02:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: forest-cover-type-dataset.zip\n",
" inflating: covtype.csv \n"
]
}
],
2024-03-17 18:28:15 +01:00
"source": [
2024-04-13 18:12:31 +02:00
"!unzip -o forest-cover-type-dataset.zip "
2024-03-17 18:28:15 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2024-04-13 17:39:48 +02:00
"<h4>Zbiór</h4>"
2024-03-17 18:28:15 +01:00
]
},
{
"cell_type": "code",
2024-04-13 17:39:48 +02:00
"execution_count": 30,
2024-03-17 18:42:28 +01:00
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:38:46.756471100Z",
"start_time": "2024-03-17T17:38:46.335674200Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-04-13 17:39:48 +02:00
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Elevation</th>\n",
" <th>Aspect</th>\n",
" <th>Slope</th>\n",
" <th>Horizontal_Distance_To_Hydrology</th>\n",
" <th>Vertical_Distance_To_Hydrology</th>\n",
" <th>Horizontal_Distance_To_Roadways</th>\n",
" <th>Hillshade_9am</th>\n",
" <th>Hillshade_Noon</th>\n",
" <th>Hillshade_3pm</th>\n",
" <th>Horizontal_Distance_To_Fire_Points</th>\n",
" <th>...</th>\n",
" <th>Soil_Type32</th>\n",
" <th>Soil_Type33</th>\n",
" <th>Soil_Type34</th>\n",
" <th>Soil_Type35</th>\n",
" <th>Soil_Type36</th>\n",
" <th>Soil_Type37</th>\n",
" <th>Soil_Type38</th>\n",
" <th>Soil_Type39</th>\n",
" <th>Soil_Type40</th>\n",
" <th>Cover_Type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>318054</th>\n",
" <td>2517</td>\n",
" <td>271</td>\n",
" <td>12</td>\n",
" <td>272</td>\n",
" <td>84</td>\n",
" <td>484</td>\n",
" <td>189</td>\n",
" <td>244</td>\n",
" <td>193</td>\n",
" <td>162</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30504</th>\n",
" <td>2959</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>180</td>\n",
" <td>20</td>\n",
" <td>5960</td>\n",
" <td>217</td>\n",
" <td>236</td>\n",
" <td>156</td>\n",
" <td>3960</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>349520</th>\n",
" <td>3093</td>\n",
" <td>54</td>\n",
" <td>19</td>\n",
" <td>42</td>\n",
" <td>-3</td>\n",
" <td>797</td>\n",
" <td>227</td>\n",
" <td>196</td>\n",
" <td>94</td>\n",
" <td>1318</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>365645</th>\n",
" <td>2502</td>\n",
" <td>330</td>\n",
" <td>17</td>\n",
" <td>150</td>\n",
" <td>52</td>\n",
" <td>738</td>\n",
" <td>177</td>\n",
" <td>216</td>\n",
" <td>178</td>\n",
" <td>510</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131114</th>\n",
" <td>2962</td>\n",
" <td>4</td>\n",
" <td>13</td>\n",
" <td>95</td>\n",
" <td>7</td>\n",
" <td>4270</td>\n",
" <td>202</td>\n",
" <td>214</td>\n",
" <td>148</td>\n",
" <td>1999</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>385769</th>\n",
" <td>3181</td>\n",
" <td>119</td>\n",
" <td>5</td>\n",
" <td>170</td>\n",
" <td>-1</td>\n",
" <td>2416</td>\n",
" <td>228</td>\n",
" <td>235</td>\n",
" <td>141</td>\n",
" <td>999</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>161626</th>\n",
" <td>2950</td>\n",
" <td>270</td>\n",
" <td>4</td>\n",
" <td>108</td>\n",
" <td>15</td>\n",
" <td>2053</td>\n",
" <td>210</td>\n",
" <td>241</td>\n",
" <td>170</td>\n",
" <td>2037</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>394880</th>\n",
" <td>3051</td>\n",
" <td>155</td>\n",
" <td>22</td>\n",
" <td>390</td>\n",
" <td>70</td>\n",
" <td>1871</td>\n",
" <td>239</td>\n",
" <td>236</td>\n",
" <td>114</td>\n",
" <td>1510</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>389492</th>\n",
" <td>3024</td>\n",
" <td>191</td>\n",
" <td>16</td>\n",
" <td>785</td>\n",
" <td>110</td>\n",
" <td>3000</td>\n",
" <td>218</td>\n",
" <td>251</td>\n",
" <td>162</td>\n",
" <td>1961</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52507</th>\n",
" <td>2714</td>\n",
" <td>349</td>\n",
" <td>18</td>\n",
" <td>67</td>\n",
" <td>20</td>\n",
" <td>1599</td>\n",
" <td>184</td>\n",
" <td>207</td>\n",
" <td>160</td>\n",
" <td>3234</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 55 columns</p>\n",
"</div>"
],
"text/plain": [
" Elevation Aspect Slope Horizontal_Distance_To_Hydrology \\\n",
"318054 2517 271 12 272 \n",
"30504 2959 0 1 180 \n",
"349520 3093 54 19 42 \n",
"365645 2502 330 17 150 \n",
"131114 2962 4 13 95 \n",
"385769 3181 119 5 170 \n",
"161626 2950 270 4 108 \n",
"394880 3051 155 22 390 \n",
"389492 3024 191 16 785 \n",
"52507 2714 349 18 67 \n",
"\n",
" Vertical_Distance_To_Hydrology Horizontal_Distance_To_Roadways \\\n",
"318054 84 484 \n",
"30504 20 5960 \n",
"349520 -3 797 \n",
"365645 52 738 \n",
"131114 7 4270 \n",
"385769 -1 2416 \n",
"161626 15 2053 \n",
"394880 70 1871 \n",
"389492 110 3000 \n",
"52507 20 1599 \n",
"\n",
" Hillshade_9am Hillshade_Noon Hillshade_3pm \\\n",
"318054 189 244 193 \n",
"30504 217 236 156 \n",
"349520 227 196 94 \n",
"365645 177 216 178 \n",
"131114 202 214 148 \n",
"385769 228 235 141 \n",
"161626 210 241 170 \n",
"394880 239 236 114 \n",
"389492 218 251 162 \n",
"52507 184 207 160 \n",
"\n",
" Horizontal_Distance_To_Fire_Points ... Soil_Type32 Soil_Type33 \\\n",
"318054 162 ... 0 0 \n",
"30504 3960 ... 0 0 \n",
"349520 1318 ... 0 0 \n",
"365645 510 ... 0 0 \n",
"131114 1999 ... 0 0 \n",
"385769 999 ... 0 0 \n",
"161626 2037 ... 0 0 \n",
"394880 1510 ... 0 0 \n",
"389492 1961 ... 0 1 \n",
"52507 3234 ... 0 0 \n",
"\n",
" Soil_Type34 Soil_Type35 Soil_Type36 Soil_Type37 Soil_Type38 \\\n",
"318054 0 0 0 0 0 \n",
"30504 0 0 0 0 0 \n",
"349520 0 0 0 0 0 \n",
"365645 0 0 0 0 0 \n",
"131114 0 0 0 0 0 \n",
"385769 0 0 0 0 0 \n",
"161626 0 0 0 0 0 \n",
"394880 0 0 0 0 0 \n",
"389492 0 0 0 0 0 \n",
"52507 0 0 0 0 0 \n",
"\n",
" Soil_Type39 Soil_Type40 Cover_Type \n",
"318054 0 0 2 \n",
"30504 0 0 2 \n",
"349520 0 0 1 \n",
"365645 0 0 6 \n",
"131114 0 0 2 \n",
"385769 0 0 1 \n",
"161626 0 0 2 \n",
"394880 0 0 1 \n",
"389492 0 0 2 \n",
"52507 0 0 2 \n",
"\n",
"[10 rows x 55 columns]"
]
2024-03-17 18:28:15 +01:00
},
2024-04-13 17:39:48 +02:00
"execution_count": 30,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-04-13 17:39:48 +02:00
"data = pd.read_csv(\"covtype.csv\")\n",
"data = data.sample(frac = 1)\n",
2024-03-17 18:28:15 +01:00
"data.head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Podział na podzbiory"
]
},
{
"cell_type": "code",
2024-04-13 17:39:48 +02:00
"execution_count": 4,
2024-03-17 18:42:28 +01:00
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:38:53.929532900Z",
"start_time": "2024-03-17T17:38:51.607851900Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
2024-04-13 17:39:48 +02:00
"forest_train, forest_test = train_test_split(data, test_size=0.2, random_state=1)\n",
"forest_train, forest_val = train_test_split(forest_train, test_size=0.25, random_state=1)"
2024-03-17 18:28:15 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Statystyki</h4>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Wielkości zbiorów"
]
},
{
"cell_type": "code",
2024-04-13 17:39:48 +02:00
"execution_count": 5,
2024-03-17 18:42:28 +01:00
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:38:58.202546800Z",
"start_time": "2024-03-17T17:38:58.143643600Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-04-13 17:39:48 +02:00
"wielkość zbioru: (581012, 55)\n",
"wielkość zbioru treningowego: (348606, 55)\n",
"wielkość zbioru testującego: (116203, 55)\n",
"wielkość zbioru walidacyjnego: (116203, 55)\n"
2024-03-17 18:28:15 +01:00
]
}
],
"source": [
"print(f'wielkość zbioru: {data.shape}')\n",
2024-04-13 17:39:48 +02:00
"print(f'wielkość zbioru treningowego: {forest_train.shape}')\n",
"print(f'wielkość zbioru testującego: {forest_test.shape}')\n",
"print(f'wielkość zbioru walidacyjnego: {forest_val.shape}')"
2024-03-17 18:28:15 +01:00
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:39:01.338802400Z",
"start_time": "2024-03-17T17:39:01.252677700Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
2024-04-13 17:39:48 +02:00
"RangeIndex: 581012 entries, 0 to 581011\n",
"Data columns (total 55 columns):\n",
" # Column Non-Null Count Dtype\n",
"--- ------ -------------- -----\n",
" 0 Elevation 581012 non-null int64\n",
" 1 Aspect 581012 non-null int64\n",
" 2 Slope 581012 non-null int64\n",
" 3 Horizontal_Distance_To_Hydrology 581012 non-null int64\n",
" 4 Vertical_Distance_To_Hydrology 581012 non-null int64\n",
" 5 Horizontal_Distance_To_Roadways 581012 non-null int64\n",
" 6 Hillshade_9am 581012 non-null int64\n",
" 7 Hillshade_Noon 581012 non-null int64\n",
" 8 Hillshade_3pm 581012 non-null int64\n",
" 9 Horizontal_Distance_To_Fire_Points 581012 non-null int64\n",
" 10 Wilderness_Area1 581012 non-null int64\n",
" 11 Wilderness_Area2 581012 non-null int64\n",
" 12 Wilderness_Area3 581012 non-null int64\n",
" 13 Wilderness_Area4 581012 non-null int64\n",
" 14 Soil_Type1 581012 non-null int64\n",
" 15 Soil_Type2 581012 non-null int64\n",
" 16 Soil_Type3 581012 non-null int64\n",
" 17 Soil_Type4 581012 non-null int64\n",
" 18 Soil_Type5 581012 non-null int64\n",
" 19 Soil_Type6 581012 non-null int64\n",
" 20 Soil_Type7 581012 non-null int64\n",
" 21 Soil_Type8 581012 non-null int64\n",
" 22 Soil_Type9 581012 non-null int64\n",
" 23 Soil_Type10 581012 non-null int64\n",
" 24 Soil_Type11 581012 non-null int64\n",
" 25 Soil_Type12 581012 non-null int64\n",
" 26 Soil_Type13 581012 non-null int64\n",
" 27 Soil_Type14 581012 non-null int64\n",
" 28 Soil_Type15 581012 non-null int64\n",
" 29 Soil_Type16 581012 non-null int64\n",
" 30 Soil_Type17 581012 non-null int64\n",
" 31 Soil_Type18 581012 non-null int64\n",
" 32 Soil_Type19 581012 non-null int64\n",
" 33 Soil_Type20 581012 non-null int64\n",
" 34 Soil_Type21 581012 non-null int64\n",
" 35 Soil_Type22 581012 non-null int64\n",
" 36 Soil_Type23 581012 non-null int64\n",
" 37 Soil_Type24 581012 non-null int64\n",
" 38 Soil_Type25 581012 non-null int64\n",
" 39 Soil_Type26 581012 non-null int64\n",
" 40 Soil_Type27 581012 non-null int64\n",
" 41 Soil_Type28 581012 non-null int64\n",
" 42 Soil_Type29 581012 non-null int64\n",
" 43 Soil_Type30 581012 non-null int64\n",
" 44 Soil_Type31 581012 non-null int64\n",
" 45 Soil_Type32 581012 non-null int64\n",
" 46 Soil_Type33 581012 non-null int64\n",
" 47 Soil_Type34 581012 non-null int64\n",
" 48 Soil_Type35 581012 non-null int64\n",
" 49 Soil_Type36 581012 non-null int64\n",
" 50 Soil_Type37 581012 non-null int64\n",
" 51 Soil_Type38 581012 non-null int64\n",
" 52 Soil_Type39 581012 non-null int64\n",
" 53 Soil_Type40 581012 non-null int64\n",
" 54 Cover_Type 581012 non-null int64\n",
"dtypes: int64(55)\n",
"memory usage: 243.8 MB\n"
2024-03-17 18:28:15 +01:00
]
}
],
"source": [
"data.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2024-04-13 17:39:48 +02:00
"Nachylenie\n",
"\n"
2024-03-17 18:28:15 +01:00
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:39:04.561664800Z",
"start_time": "2024-03-17T17:39:04.530325200Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-04-13 17:39:48 +02:00
"Średnie nachylenie: 14.103703537964792\n",
"Maksymalne nachylenie: 66\n",
"Minimalne nachylenie: 0\n"
2024-03-17 18:28:15 +01:00
]
}
],
"source": [
2024-04-13 17:39:48 +02:00
"print(f'Średnie nachylenie: {data[\"Slope\"].mean()}')\n",
"print(f'Maksymalne nachylenie: {data[\"Slope\"].max()}')\n",
"print(f'Minimalne nachylenie: {data[\"Slope\"].min()}')"
2024-03-17 18:28:15 +01:00
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 12,
2024-03-17 18:28:15 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-04-13 17:39:48 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAACWwAAA9nCAYAAABopOliAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzde5SWZb038O8wDDMcnEFEBgwGMZXExGPl5I7MEyqalaVliVnKxmOJlds0M32RDlvNElHezDRlu9V0l4fyDJiipkmedplmoAmYpxlBGGBm3j9czMsEjCjPPA8wn89az+I+/O77+t3DtGzdfJ/rKmttbW0NAAAAAAAAAAAAna5bqRsAAAAAAAAAAADoKgS2AAAAAAAAAAAAikRgCwAAAAAAAAAAoEgEtgAAAAAAAAAAAIpEYAsAAAAAAAAAAKBIBLYAAAAAAAAAAACKRGALAAAAAAAAAACgSAS2AAAAAAAAAAAAikRgCwAAAAAAAAAAoEgEtgCADdKee+6ZsrKy7LnnnqVuZb3hZwIAAAAAAADrP4EtAKDopk+fnrKysnf1+frXv17qtgEAAACAApoxY0a7d4APPPBAqVsCACgKgS0AgPXYl7/85ZSVlWXLLbcsdSsAAAAAUFBXXnllu/2rrrqqRJ1sWFYE3M4+++xStwIAvEfdS90AANC1HXfccTn++OPfsa5///5F6GbDNn369FK3AAAAAABrZfHixbnhhhuSJH369MnChQtz3XXX5aKLLkplZWWJuwMA6FwCWwBASQ0YMCAf/OAHS90GAAAAAFBEN910U958880kyU9+8pN85Stfyeuvv56bb745n/3sZ0vcHQBA57IkIgAAAAAAAFBUK5Y/HDlyZI4++ugMHz683XEAgI2ZwBYAsFGbP39+zjjjjOy2227p169fKisrM2TIkBx22GG56667VnvNV77ylZSVlaVnz55t3/LryPDhw1NWVpYPf/jD7Y63tLTknnvuyTe+8Y3sscce6d+/fyoqKtK3b9/stNNO+cY3vpG5c+eu9p5nn312ysrKcuWVVyZJ5syZk7KyslU+K9tzzz1TVlaWPffcs8N+f//73+fII4/MlltumaqqqvTt2zc777xzzjzzzPzzn/9c43XTp09vG3fF8ovXXXdd9t5772y++ebp2bNnhg8fnm9961t57bXX3uGnBgAAAEBXNW/evLZ3c1/60pfa/fm73/2uw3dUSfLSSy/lP/7jP7LLLrukpqYmFRUVqa2tzQ477JAvfOEL+cUvfpHGxsZVrlvxbuvss89Oktx111355Cc/mUGDBqWqqipbbbVVTjzxxPzjH/9Yq+f44x//mPHjx2f48OHp06dPevfuneHDh+e4447LM888s1b3ePLJJ3PSSSdlhx12yKabbpqKiooMHDgw++yzT374wx9m3rx5bbVbbrllu3eC3/ve91Z5X/jlL395rcYFAEpLYAsA2Ghdc8012XrrrXPeeefl0Ucfzeuvv56lS5fmxRdfzPXXX5999903xxxzTJYvX97uui9+8YtJkiVLluTGG2/scIxHHnmk7eXLiutWOOecc7L33nvn/PPPzwMPPJBXX301y5cvT0NDQ/70pz/l/PPPz3bbbZebbrqpgE+9Zi0tLTnxxBPzsY99LFdffXXmzJmTpqamNDQ0ZPbs2Zk4cWK22Wab3HnnnWt1ryOPPDKHH3547rnnnrzyyitZsmRJnnnmmfzoRz/KRz7ykcyfP78ITwUAAADAhuaaa65Jc3NzunXrliOOOCLJ2+/WysrKsmzZsvzXf/3XGq+97777st122+UHP/hBHnvssTQ2Nmb58uV5+eWX8+STT+baa6/N0UcfnZkzZ3bYw/e+973su+++ufnmmzN//vw0NTXl+eefz+TJk7P99tvnvvvuW+O1LS0tmTBhQnbbbbdcdtlleeaZZ7Jo0aK89dZbeeaZZ3LppZdm++23z9SpU9d4j+bm5kyYMCEjR47MxRdfnCeffDJvvPFGli9fngULFuTuu+/OaaedltNPP/0dfpoAwIaoe6kbAADoDNddd12OPPLItLa2tn0zbsSIEdl8883z97//PZdffnluu+22XH755amurs4FF1zQdu0nPvGJbLHFFnnppZdyzTXX5KijjlrjONOmTUuSlJeX5/Of/3y7c8uXL8+gQYPy6U9/OvX19dlqq61SVVWVF154IQ888EAuueSSLFy4MEcccUT++Mc/Zrvttmu79vjjj89nP/vZnHnmmfn1r3+dLbbYIrfffvs6/Uz+4z/+I5MnT06SDBs2LKeddlp22WWXLFq0KL/5zW9y8cUXp6GhIQcddFAefvjh7Ljjjmu813e+85088MAD+dSnPpWxY8dm6NChWbBgQSZPnpxbb701zz77bE455ZQOX64BAAAA0DX98pe/TPL2jPHve9/7krz9vuqjH/1o7r///lx11VU5+eSTV7muqakpn//859PY2JhNNtkkxx13XD7xiU9kwIABWbp0aZ5//vk88MAD7/gFyVtvvTWPPPJI22zxI0eOTENDQ66//vr83//7f9vekT355JMZMmTIKtefdNJJueSSS5Iko0aNype//OVstdVW6dWrV/70pz/lxz/+cZ566qn8+7//ewYOHJhPfvKTq9xj3Lhx+fnPf54kGTRoUE488cR89KMfTU1NTf75z3/m4Ycfzg033NDumjvuuCNLly7NDjvskCQ57rjjcvzxx7er2XTTTTt8dgBg/VDW2traWuomAICuZfr06fnEJz6RZPUvFVZn+PDhqaioaNvfc889M2PGjHz84x9vW55vhVdeeSVbb711Ghoa8pWvfCWXXXZZundfNad+xhln5Lzzzku3bt3y9NNPZ/jw4W3nTj311FxwwQUpLy/PP/7xj9TW1q5yfUtLS4YMGZKXXnop++233yqBqr///e953/ve167vlb344ovZfffd849//CNf+tKX2l5UrezLX/5yrrzyygwdOjR///vfO/oRdfgzeeKJJ7LTTjulpaUlH/zgB3Pfffelb9++7Wp+97vfZcyYMWlpacmHP/zhPPTQQ+3Or/z3liT/5//8n5xxxhntalpbW7P//vvnjjvuSPfu3fPSSy9l880377BvAAAAALqO2bNnZ+edd06S/PznP8/RRx/ddu7SSy/NcccdlyR56qmnMmLEiHbX3nPPPdl7772TJDfffHMOOuig1Y6xfPnyvPXWW6murm53fOXlBHfZZZfMmDEjffr0aVfzy1/+MmPHjk2SfO5zn8t1113X7vydd96Z/fbbL0nys5/9LF/96ldXGX/JkiUZM2ZM7rnnngwdOjTPPvtsu/eTv/nNb3LIIYckSerr63Pbbbet8q5uhRdeeGGV0NiK5/jud7/btrwjALBhsSQiAFBSU6ZMyQ477PCOn3/84x/v6p4NDQ153/vel0suuWS1Ya3k7WnP3/e+96WlpSVXXXVVu3Mrljdsbm7Otddeu9rr77333rz00kvt6le25ZZbrjGslSSDBw/ON7/5zSRvv6TpzBz9lClT0tLSkuTtF0mrewG0//775ytf+UqS5OGHH84f/vCHNd5v1113zbe//e1VjpeVlWXChAlJ3n4xNmvWrAJ0DwAAAMDGYsV7uJ49e+bQQw9td+6www5Ljx492tWtbP78+W3bo0aNWuMY3bt3XyWs9a+mTp26SlgrSY488sgccMABSZKbbrqp3ZhJ8v3vfz9Jcuihh642rJUkVVVVufjii5Mkc+bMyb333rvae/Tq1Ss33HDDGsNaSVY7wxcAsOET2AIANjq/+c1vkiQHHXRQKisr11jXvXv31NfXJ8kqwaJddtklH/jAB5L8/2UP/9WK4z179synP/3pd+yrsbExzz//fJ566qk8+eSTefLJJ9OrV6925zrLXXfdlSTZfvvt85GPfGSNdccee+wq16zOEUcc0e4biSvbdddd27b/9re/vdtWAQAAANhILV++vO2d2sEHH7xKqKpfv3458MADkyTXXHNN2xcQVxg0aFDb9hVXXPGe+9hhhx3avcP6Vyu+1Lh8+fJ2M9k3Nja27X/2s5/tcIztttsu/fv3T9L
"text/plain": [
"<Figure size 3000x5000 with 10 Axes>"
]
2024-03-17 18:28:15 +01:00
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-04-13 17:39:48 +02:00
"import seaborn as sns\n",
"features = data.loc[:,'Elevation':'Horizontal_Distance_To_Fire_Points']\n",
"\n",
"plt.figure(figsize=(30, 50))\n",
"for i,col in enumerate(features.columns.values):\n",
" plt.subplot(5,2,i+1)\n",
" sns.boxplot(x=data['Cover_Type'], y=col, data=data)\n",
" plt.title(col, fontsize=20)\n",
" \n",
2024-03-17 18:28:15 +01:00
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2024-04-13 17:39:48 +02:00
"Normalizacja"
2024-03-17 18:28:15 +01:00
]
},
{
"cell_type": "code",
2024-04-13 17:39:48 +02:00
"execution_count": 31,
2024-03-17 18:28:15 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-04-13 17:39:48 +02:00
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Elevation</th>\n",
" <th>Aspect</th>\n",
" <th>Slope</th>\n",
" <th>Horizontal_Distance_To_Hydrology</th>\n",
" <th>Vertical_Distance_To_Hydrology</th>\n",
" <th>Horizontal_Distance_To_Roadways</th>\n",
" <th>Hillshade_9am</th>\n",
" <th>Hillshade_Noon</th>\n",
" <th>Hillshade_3pm</th>\n",
" <th>Horizontal_Distance_To_Fire_Points</th>\n",
" <th>...</th>\n",
" <th>Soil_Type32</th>\n",
" <th>Soil_Type33</th>\n",
" <th>Soil_Type34</th>\n",
" <th>Soil_Type35</th>\n",
" <th>Soil_Type36</th>\n",
" <th>Soil_Type37</th>\n",
" <th>Soil_Type38</th>\n",
" <th>Soil_Type39</th>\n",
" <th>Soil_Type40</th>\n",
" <th>Cover_Type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>318054</th>\n",
" <td>-1.579964</td>\n",
" <td>1.030645</td>\n",
" <td>-0.280934</td>\n",
" <td>0.012100</td>\n",
" <td>0.644670</td>\n",
" <td>-1.196821</td>\n",
" <td>-0.864631</td>\n",
" <td>1.046164</td>\n",
" <td>1.318678</td>\n",
" <td>-1.373130</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30504</th>\n",
" <td>-0.001305</td>\n",
" <td>-1.390866</td>\n",
" <td>-1.749905</td>\n",
" <td>-0.420741</td>\n",
" <td>-0.453191</td>\n",
" <td>2.315116</td>\n",
" <td>0.181321</td>\n",
" <td>0.641484</td>\n",
" <td>0.351977</td>\n",
" <td>1.495029</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>349520</th>\n",
" <td>0.477293</td>\n",
" <td>-0.908351</td>\n",
" <td>0.653865</td>\n",
" <td>-1.070003</td>\n",
" <td>-0.847735</td>\n",
" <td>-0.996083</td>\n",
" <td>0.554876</td>\n",
" <td>-1.381919</td>\n",
" <td>-1.267901</td>\n",
" <td>-0.500147</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>365645</th>\n",
" <td>-1.633538</td>\n",
" <td>1.557837</td>\n",
" <td>0.386780</td>\n",
" <td>-0.561885</td>\n",
" <td>0.095739</td>\n",
" <td>-1.033922</td>\n",
" <td>-1.312896</td>\n",
" <td>-0.370218</td>\n",
" <td>0.926772</td>\n",
" <td>-1.110329</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131114</th>\n",
" <td>0.009410</td>\n",
" <td>-1.355124</td>\n",
" <td>-0.147392</td>\n",
" <td>-0.820649</td>\n",
" <td>-0.676194</td>\n",
" <td>1.231264</td>\n",
" <td>-0.379010</td>\n",
" <td>-0.471388</td>\n",
" <td>0.142960</td>\n",
" <td>0.014128</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>385769</th>\n",
" <td>0.791596</td>\n",
" <td>-0.327546</td>\n",
" <td>-1.215734</td>\n",
" <td>-0.467789</td>\n",
" <td>-0.813427</td>\n",
" <td>0.042234</td>\n",
" <td>0.592231</td>\n",
" <td>0.590899</td>\n",
" <td>-0.039929</td>\n",
" <td>-0.741048</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>161626</th>\n",
" <td>-0.033449</td>\n",
" <td>1.021709</td>\n",
" <td>-1.349277</td>\n",
" <td>-0.759486</td>\n",
" <td>-0.538961</td>\n",
" <td>-0.190570</td>\n",
" <td>-0.080167</td>\n",
" <td>0.894409</td>\n",
" <td>0.717756</td>\n",
" <td>0.042825</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>394880</th>\n",
" <td>0.327285</td>\n",
" <td>-0.005869</td>\n",
" <td>1.054494</td>\n",
" <td>0.567265</td>\n",
" <td>0.404513</td>\n",
" <td>-0.307292</td>\n",
" <td>1.003141</td>\n",
" <td>0.641484</td>\n",
" <td>-0.745360</td>\n",
" <td>-0.355153</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>389492</th>\n",
" <td>0.230851</td>\n",
" <td>0.315808</td>\n",
" <td>0.253237</td>\n",
" <td>2.425659</td>\n",
" <td>1.090676</td>\n",
" <td>0.416772</td>\n",
" <td>0.218677</td>\n",
" <td>1.400260</td>\n",
" <td>0.508739</td>\n",
" <td>-0.014568</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52507</th>\n",
" <td>-0.876353</td>\n",
" <td>1.727611</td>\n",
" <td>0.520322</td>\n",
" <td>-0.952383</td>\n",
" <td>-0.453191</td>\n",
" <td>-0.481735</td>\n",
" <td>-1.051408</td>\n",
" <td>-0.825483</td>\n",
" <td>0.456485</td>\n",
" <td>0.946771</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 55 columns</p>\n",
"</div>"
],
"text/plain": [
" Elevation Aspect Slope Horizontal_Distance_To_Hydrology \\\n",
"318054 -1.579964 1.030645 -0.280934 0.012100 \n",
"30504 -0.001305 -1.390866 -1.749905 -0.420741 \n",
"349520 0.477293 -0.908351 0.653865 -1.070003 \n",
"365645 -1.633538 1.557837 0.386780 -0.561885 \n",
"131114 0.009410 -1.355124 -0.147392 -0.820649 \n",
"385769 0.791596 -0.327546 -1.215734 -0.467789 \n",
"161626 -0.033449 1.021709 -1.349277 -0.759486 \n",
"394880 0.327285 -0.005869 1.054494 0.567265 \n",
"389492 0.230851 0.315808 0.253237 2.425659 \n",
"52507 -0.876353 1.727611 0.520322 -0.952383 \n",
"\n",
" Vertical_Distance_To_Hydrology Horizontal_Distance_To_Roadways \\\n",
"318054 0.644670 -1.196821 \n",
"30504 -0.453191 2.315116 \n",
"349520 -0.847735 -0.996083 \n",
"365645 0.095739 -1.033922 \n",
"131114 -0.676194 1.231264 \n",
"385769 -0.813427 0.042234 \n",
"161626 -0.538961 -0.190570 \n",
"394880 0.404513 -0.307292 \n",
"389492 1.090676 0.416772 \n",
"52507 -0.453191 -0.481735 \n",
"\n",
" Hillshade_9am Hillshade_Noon Hillshade_3pm \\\n",
"318054 -0.864631 1.046164 1.318678 \n",
"30504 0.181321 0.641484 0.351977 \n",
"349520 0.554876 -1.381919 -1.267901 \n",
"365645 -1.312896 -0.370218 0.926772 \n",
"131114 -0.379010 -0.471388 0.142960 \n",
"385769 0.592231 0.590899 -0.039929 \n",
"161626 -0.080167 0.894409 0.717756 \n",
"394880 1.003141 0.641484 -0.745360 \n",
"389492 0.218677 1.400260 0.508739 \n",
"52507 -1.051408 -0.825483 0.456485 \n",
"\n",
" Horizontal_Distance_To_Fire_Points ... Soil_Type32 Soil_Type33 \\\n",
"318054 -1.373130 ... 0 0 \n",
"30504 1.495029 ... 0 0 \n",
"349520 -0.500147 ... 0 0 \n",
"365645 -1.110329 ... 0 0 \n",
"131114 0.014128 ... 0 0 \n",
"385769 -0.741048 ... 0 0 \n",
"161626 0.042825 ... 0 0 \n",
"394880 -0.355153 ... 0 0 \n",
"389492 -0.014568 ... 0 1 \n",
"52507 0.946771 ... 0 0 \n",
"\n",
" Soil_Type34 Soil_Type35 Soil_Type36 Soil_Type37 Soil_Type38 \\\n",
"318054 0 0 0 0 0 \n",
"30504 0 0 0 0 0 \n",
"349520 0 0 0 0 0 \n",
"365645 0 0 0 0 0 \n",
"131114 0 0 0 0 0 \n",
"385769 0 0 0 0 0 \n",
"161626 0 0 0 0 0 \n",
"394880 0 0 0 0 0 \n",
"389492 0 0 0 0 0 \n",
"52507 0 0 0 0 0 \n",
"\n",
" Soil_Type39 Soil_Type40 Cover_Type \n",
"318054 0 0 2 \n",
"30504 0 0 2 \n",
"349520 0 0 1 \n",
"365645 0 0 6 \n",
"131114 0 0 2 \n",
"385769 0 0 1 \n",
"161626 0 0 2 \n",
"394880 0 0 1 \n",
"389492 0 0 2 \n",
"52507 0 0 2 \n",
"\n",
"[10 rows x 55 columns]"
]
2024-03-17 18:28:15 +01:00
},
2024-04-13 17:39:48 +02:00
"execution_count": 31,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"scaler = StandardScaler()\n",
"\n",
2024-04-13 17:39:48 +02:00
"columns_to_normalize = data.columns[~data.columns.str.startswith('Soil_Type')]\n",
"columns_to_normalize = columns_to_normalize.to_list()\n",
"columns_to_normalize.remove('Cover_Type')\n",
"data[columns_to_normalize] = scaler.fit_transform(data[columns_to_normalize])\n",
2024-03-17 18:28:15 +01:00
"\n",
2024-04-13 17:39:48 +02:00
"data.head(10)"
2024-03-17 18:28:15 +01:00
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}