ium_464914/IUM_2.ipynb

872 lines
641 KiB
Plaintext
Raw Normal View History

2024-03-17 18:28:15 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"%pip install --user kaggle \n",
"%pip install --user pandas\n",
"%pip install --user scikit-learn\n",
"%pip install --user matplotlib\n",
"%pip install --user geopandas"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:38:40.019389600Z",
"start_time": "2024-03-17T17:38:36.535384600Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [],
"source": [
"import matplotlib.pyplot as plt \n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!kaggle datasets download -d nasa/meteorite-landings"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:38:23.310983800Z",
"start_time": "2024-03-17T17:38:22.919245800Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: meteorite-landings.zip\n",
" inflating: data/meteorite-landings.csv \n"
]
}
],
2024-03-17 18:28:15 +01:00
"source": [
"!unzip -o meteorite-landings.zip -d data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Czyszczenie zbioru</h4>"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:38:46.756471100Z",
"start_time": "2024-03-17T17:38:46.335674200Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": " name id nametype recclass mass fall year \\\n0 Aachen 1 Valid L5 21.0 Fell 1880.0 \n1 Aarhus 2 Valid H6 720.0 Fell 1951.0 \n2 Abee 6 Valid EH4 107000.0 Fell 1952.0 \n3 Acapulco 10 Valid Acapulcoite 1914.0 Fell 1976.0 \n4 Achiras 370 Valid L6 780.0 Fell 1902.0 \n5 Adhi Kot 379 Valid EH4 4239.0 Fell 1919.0 \n6 Adzhi-Bogdo (stone) 390 Valid LL3-6 910.0 Fell 1949.0 \n7 Agen 392 Valid H5 30000.0 Fell 1814.0 \n8 Aguada 398 Valid L6 1620.0 Fell 1930.0 \n9 Aguila Blanca 417 Valid L 1440.0 Fell 1920.0 \n\n reclat reclong GeoLocation \n0 50.77500 6.08333 (50.775000, 6.083330) \n1 56.18333 10.23333 (56.183330, 10.233330) \n2 54.21667 -113.00000 (54.216670, -113.000000) \n3 16.88333 -99.90000 (16.883330, -99.900000) \n4 -33.16667 -64.95000 (-33.166670, -64.950000) \n5 32.10000 71.80000 (32.100000, 71.800000) \n6 44.83333 95.16667 (44.833330, 95.166670) \n7 44.21667 0.61667 (44.216670, 0.616670) \n8 -31.60000 -65.23333 (-31.600000, -65.233330) \n9 -30.86667 -64.55000 (-30.866670, -64.550000) ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>id</th>\n <th>nametype</th>\n <th>recclass</th>\n <th>mass</th>\n <th>fall</th>\n <th>year</th>\n <th>reclat</th>\n <th>reclong</th>\n <th>GeoLocation</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Aachen</td>\n <td>1</td>\n <td>Valid</td>\n <td>L5</td>\n <td>21.0</td>\n <td>Fell</td>\n <td>1880.0</td>\n <td>50.77500</td>\n <td>6.08333</td>\n <td>(50.775000, 6.083330)</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Aarhus</td>\n <td>2</td>\n <td>Valid</td>\n <td>H6</td>\n <td>720.0</td>\n <td>Fell</td>\n <td>1951.0</td>\n <td>56.18333</td>\n <td>10.23333</td>\n <td>(56.183330, 10.233330)</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Abee</td>\n <td>6</td>\n <td>Valid</td>\n <td>EH4</td>\n <td>107000.0</td>\n <td>Fell</td>\n <td>1952.0</td>\n <td>54.21667</td>\n <td>-113.00000</td>\n <td>(54.216670, -113.000000)</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Acapulco</td>\n <td>10</td>\n <td>Valid</td>\n <td>Acapulcoite</td>\n <td>1914.0</td>\n <td>Fell</td>\n <td>1976.0</td>\n <td>16.88333</td>\n <td>-99.90000</td>\n <td>(16.883330, -99.900000)</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Achiras</td>\n <td>370</td>\n <td>Valid</td>\n <td>L6</td>\n <td>780.0</td>\n <td>Fell</td>\n <td>1902.0</td>\n <td>-33.16667</td>\n <td>-64.95000</td>\n <td>(-33.166670, -64.950000)</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Adhi Kot</td>\n <td>379</td>\n <td>Valid</td>\n <td>EH4</td>\n <td>4239.0</td>\n <td>Fell</td>\n <td>1919.0</td>\n <td>32.10000</td>\n <td>71.80000</td>\n <td>(32.100000, 71.800000)</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Adzhi-Bogdo (stone)</td>\n <td>390</td>\n <td>Valid</td>\n <td>LL3-6</td>\n <td>910.0</td>\n <td>Fell</td>\n <td>1949.0</td>\n <td>44.83333</td>\n <td>95.16667</td>\n <td>(44.833330, 95.166670)</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Agen</td>\n <td>392</td>\n <td>Valid</td>\n <td>H5</td>\n <td>30000.0</td>\n <td>Fell</td>\n <td>1814.0</td>\n <td>44.21667</td>\n <td>0.61667</td>\n <td>(44.216670, 0.616670)</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Aguada</td>\n <td>398</td>\n <td>Valid</td>\n <td>L6</td>\n <td>1620.0</td>\n <td>Fell</td>\n <td>1930.0</td>\n <td>-31.60000</td>\n <td>-65.23333</td>\n <td>(-31.600000, -65.233330)</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Aguila Blanca</td>\n <td>417</td>\n <td>Valid</td>\n <td>L</td>\n <td>1440.0</td>\n <td>Fell</td>\n <td>1920.0</td>\n <td>-30.86667</td>\n <td>-64.55000</td>\n <td>(-30.866670, -64.550000)</td>\n </tr>\n </tbody>\n</table>\n</div>"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 6,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.read_csv(\"data/meteorite-landings.csv\")\n",
"data.head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Podział na podzbiory"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:38:53.929532900Z",
"start_time": "2024-03-17T17:38:51.607851900Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"meteorite_train, meteorite_test = train_test_split(data, test_size=0.2, random_state=1)\n",
"meteorite_train, meteorite_val = train_test_split(meteorite_train, test_size=0.25, random_state=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Statystyki</h4>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Wielkości zbiorów"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:38:58.202546800Z",
"start_time": "2024-03-17T17:38:58.143643600Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"wielkość zbioru: (45716, 10)\n",
"wielkość zbioru treningowego: (27429, 10)\n",
"wielkość zbioru testującego: (9144, 10)\n",
"wielkość zbioru walidacyjnego: (9143, 10)\n"
]
}
],
"source": [
"print(f'wielkość zbioru: {data.shape}')\n",
"print(f'wielkość zbioru treningowego: {meteorite_train.shape}')\n",
"print(f'wielkość zbioru testującego: {meteorite_test.shape}')\n",
"print(f'wielkość zbioru walidacyjnego: {meteorite_val.shape}')"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:39:01.338802400Z",
"start_time": "2024-03-17T17:39:01.252677700Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 45716 entries, 0 to 45715\n",
"Data columns (total 10 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 name 45716 non-null object \n",
" 1 id 45716 non-null int64 \n",
" 2 nametype 45716 non-null object \n",
" 3 recclass 45716 non-null object \n",
" 4 mass 45585 non-null float64\n",
" 5 fall 45716 non-null object \n",
" 6 year 45428 non-null float64\n",
" 7 reclat 38401 non-null float64\n",
" 8 reclong 38401 non-null float64\n",
" 9 GeoLocation 38401 non-null object \n",
"dtypes: float64(4), int64(1), object(5)\n",
"memory usage: 3.5+ MB\n"
]
}
],
"source": [
"data.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Masa meteorytu"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:39:04.561664800Z",
"start_time": "2024-03-17T17:39:04.530325200Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Średnia masa: 13278.078548601516\n",
"Maksymalna masa: 60000000.0\n",
"Minimalna masa: 0.0\n"
]
}
],
"source": [
"print(f'Średnia masa: {data[\"mass\"].mean()}')\n",
"print(f'Maksymalna masa: {data[\"mass\"].max()}')\n",
"print(f'Minimalna masa: {data[\"mass\"].min()}')"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:39:08.291844400Z",
"start_time": "2024-03-17T17:39:06.361242500Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "<Figure size 1000x600 with 1 Axes>",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1EAAAIjCAYAAADiGJHUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAABDsUlEQVR4nO3dfVgU9eL//9cCAt4g3qAgCnhvkoqpaKaWph1Ds7Tyrkw0s0+nJTWy89XOJ2/KtJujWZ1NuzlK2THNUuuTeZNoWR07oobdoKZF4i2IpggqKszvD3/u1QoYs6CzsM/Hde11uTOzM6+ZXZSXM/Nem2EYhgAAAAAApeJjdQAAAAAAqEgoUQAAAABgAiUKAAAAAEygRAEAAACACZQoAAAAADCBEgUAAAAAJlCiAAAAAMAEShQAAAAAmECJAgAAAAATKFEAUArTpk2TzWa7Jtvq2bOnevbs6Xz+xRdfyGaz6cMPP7wm2x81apQaN258TbblrtzcXD300EMKCwuTzWbThAkTrI5kuWv5GQUAb0eJAuB1kpKSZLPZnI/AwECFh4erb9++evXVV3Xq1Kly2c6hQ4c0bdo0paamlsv6ypMnZyuNmTNnKikpSX/961+1aNEiPfDAAyUu27hxY9lsNvXp06fY+W+99Zbzs7B161bTWdLS0jRt2jT99ttvpl97tc2cOVMrV64s9/Ve7WMKAJ6OEgXAaz3zzDNatGiR5s2bp8cee0ySNGHCBLVt21bff/+9y7L/+7//qzNnzpha/6FDhzR9+nTTRWXdunVat26dqdeYdaVsb731lnbv3n1Vt19WGzZs0I033qipU6dqxIgR6tix4xWXDwwM1MaNG3XkyJEi8/79738rMDDQ7SxpaWmaPn265SWquM/o1SpR0tU9pgDg6ShRALxWXFycRowYodGjR2vy5Mlau3at1q9fr6ysLN15550uv5D6+fld9V8KT58+LUny9/eXv7//Vd3WlVSpUkUBAQGWbb80srKyVKtWrVIv361bN9WoUUNLly51mX7gwAF99dVX6t+/fzknvHby8vIkXZvP6B9V5mMKAH+GEgUAf3Drrbfq6aef1r59+/Tee+85pxd3v8nnn3+u7t27q1atWqpRo4ZatWqlp556StLF+5hiY2MlSaNHj3Ze2pSUlCTp4n1Pbdq00bZt23TzzTerWrVqztdefk/UJQUFBXrqqacUFham6tWr684779T+/ftdlmncuLFGjRpV5LV/XOefZSvunqi8vDw98cQTioiIUEBAgFq1aqV//OMfMgzDZTmbzaaEhAStXLlSbdq0UUBAgK6//nqtWbOm+AN+maysLI0ZM0ahoaEKDAxUTEyM3nnnHef8S/eHpaena9WqVc7sf3YWKDAwUHfffbcWL17sMv39999X7dq11bdv32Jft2vXLt17772qU6eOAgMD1alTJ33yySfO+UlJSRo8eLAkqVevXs48X3zxhXOZ1atXq0ePHqpevbqCgoLUv39//fTTT0W2tWHDBudytWrV0l133aWdO3e6LHPpc5iWlqb77rtPtWvXVvfu3V3mXWKz2ZSXl6d33nnHmeuPn42DBw/qwQcfVGhoqPN9WrBgwRWP4x+5c0y///57jRo1Sk2bNlVgYKDCwsL04IMP6tixYy7LnTp1ShMmTFDjxo0VEBCg+vXr67bbbtP27dudy+zZs0f33HOPwsLCFBgYqEaNGmnYsGE6efKky7ree+89dezYUVWrVlWdOnU0bNiwIj83pV0XAFziZ3UAAPA0DzzwgJ566imtW7dOY8eOLXaZn376SXfccYfatWunZ555RgEBAdq7d6+++eYbSVLr1q31zDPPaMqUKXr44YfVo0cPSdJNN93kXMexY8cUFxenYcOGacSIEQoNDb1irueee042m03/7//9P2VlZWnu3Lnq06ePUlNTVbVq1VLvX2my/ZFhGLrzzju1ceNGjRkzRu3bt9fatWv15JNP6uDBg3r55Zddlv/666+1fPlyPfroowoKCtKrr76qe+65RxkZGapbt26Juc6cOaOePXtq7969SkhIUJMmTbRs2TKNGjVKJ06c0Pjx49W6dWstWrRIjz/+uBo1aqQnnnhCklSvXr0/3e/77rtPf/nLX/TLL7+oWbNmkqTFixfr3nvvVZUqVYos/9NPP6lbt25q2LChJk2apOrVq+uDDz7QwIED9dFHH2nQoEG6+eabNW7cOL366qt66qmn1Lp1a+cxlqRFixYpPj5effv21QsvvKDTp09r3rx56t69u7777jtnWV2/fr3i4uLUtGlTTZs2TWfOnNFrr72mbt26afv27UVK7eDBg9WiRQvNnDmzSJG9ZNGiRXrooYfUuXNnPfzww5Lk3O/MzEzdeOONztJbr149rV69WmPGjFFOTk6pB+owe0w///xz/frrrxo9erTCwsL0008/6c0339RPP/2kb7/91lkCH3nkEX344YdKSEhQdHS0jh07pq+//lo7d+5Uhw4ddO7cOfXt21f5+fl67LHHFBYWpoMHD+rTTz/ViRMnFBwcLOniz8zTTz+tIUOG6KGHHtLRo0f12muv6eabb9Z3332nWrVqlXpdAODCAAAvs3DhQkOSkZKSUuIywcHBxg033OB8PnXqVOOPf2W+/PLLhiTj6NGjJa4jJSXFkGQsXLiwyLxbbrnFkGTMnz+/2Hm33HKL8/nGjRsNSUbDhg2NnJwc5/QPPvjAkGS88sorzmlRUVFGfHz8n67zStni4+ONqKgo5/OVK1cakowZM2a4LHfvvfcaNpvN2Lt3r3OaJMPf399l2o4dOwxJxmuvvVZkW380d+5cQ5Lx3nvvOaedO3fO6Nq1q1GjRg2XfY+KijL69+9/xfVdvuyFCxeMsLAw49lnnzUMwzDS0tIMScaXX35Z7Geid+/eRtu2bY2zZ886pxUWFho33XST0aJFC+e0ZcuWGZKMjRs3umz31KlTRq1atYyxY8e6TD9y5IgRHBzsMr19+/ZG/fr1jWPHjjmn7dixw/Dx8TFGjhzpnHbpczh8+PAi+3n5Z9QwDKN69erFfh7GjBljNGjQwMjOznaZPmzYMCM4ONg4ffp0kdf8kbvHtLj1vv/++4YkY9OmTc5pwcHBht1uL3H73333nSHJWLZsWYnL/Pbbb4avr6/x3HPPuUz/4YcfDD8/P+f00qwLAC7H5XwAUIwaNWpccZS+S/fjfPzxxyosLHRrGwEBARo9enSplx85cqSCgoKcz++99141aNBAn332mVvbL63PPvtMvr6+GjdunMv0J554QoZhaPXq1S7T+/Tp4zwrIUnt2rVTzZo19euvv/7pdsLCwjR8+HDntCpVqmjcuHHKzc3Vl19+Wab98PX11ZAhQ/T+++9Lujj4QUREhPNM3B8dP35cGzZs0JAhQ3Tq1CllZ2crOztbx44dU9++fbVnzx4dPHjwitv7/PPPdeLECQ0fPtz5+uzsbPn6+qpLly7auHGjJOnw4cNKTU3VqFGjVKdOHefr27Vrp9tuu63Y9/eRRx5x+zgYhqGPPvpIAwYMkGEYLtn69u2rkydPulw2dyVmjqkklzOmZ8+eVXZ2tm688UZJctlmrVq19N///leHDh0qdj2Xzg6tXbvWeS/h5ZYvX67CwkINGTLEZR/DwsLUokUL5/EvzboA4HKUKAAoRm5urkthudzQoUPVrVs3PfTQQwoNDdWwYcP0wQcfmCpUDRs2NDWARIsWLVye22w2NW/e/KqPCrdv3z6Fh4cXOR6XLlnbt2+fy/TIyMgi66hdu7Z+//33P91OixYt5OPj+k9TSdtxx3333ae0tDTt2LFDixcv1rBhw4r9bqW9e/fKMAw9/fTTqlevnstj6tSpki7ev3Ule/bskXTxPrvL17Fu3Trn6y/tV6tWrYqso3Xr1srOznYOHnFJkyZNzO/8/+/o0aM6ceKE3nzzzSK5LpX6P9u3PyrtMZUultPx48crNDRUVatWVb169Zz78sf7j1588UX9+OOPioiIUOfOnTVt2jSXEt6kSRMlJibq7bffVkhIiPr27SuHw+Gyjj179sgwDLVo0aLIfu7cudO5j6V
2024-03-17 18:28:15 +01:00
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(10, 6))\n",
"plt.hist(data[\"mass\"], color= \"tan\", log= True, edgecolor=\"black\")\n",
"plt.title('Distribution of Meteorite Masses')\n",
"plt.xlabel('Mass')\n",
"plt.ylabel('Frequency')\n",
"plt.legend\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:39:12.159343600Z",
"start_time": "2024-03-17T17:39:12.063242700Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": " name id nametype recclass mass fall year \\\n12627 Gove 52859 Relict Relict iron 0.0 Found 1979.0 \n25551 Miller Range 090478 55953 Valid CO3 0.0 Found 2009.0 \n31060 Österplana 048 56147 Relict Relict OC 0.0 Found 2004.0 \n31061 Österplana 049 56148 Relict Relict OC 0.0 Found 2012.0 \n31062 Österplana 050 56149 Relict Relict OC 0.0 Found 2003.0 \n31063 Österplana 051 56150 Relict Relict OC 0.0 Found 2006.0 \n31064 Österplana 052 56151 Relict Relict OC 0.0 Found 2006.0 \n31065 Österplana 053 56152 Relict Relict OC 0.0 Found 2002.0 \n31066 Österplana 054 56153 Relict Relict OC 0.0 Found 2005.0 \n31067 Österplana 055 56154 Relict Relict OC 0.0 Found 2008.0 \n31068 Österplana 056 56155 Relict Relict OC 0.0 Found 2008.0 \n31069 Österplana 057 56156 Relict Relict OC 0.0 Found 2009.0 \n31070 Österplana 058 56157 Relict Relict OC 0.0 Found 2009.0 \n31071 Österplana 059 56158 Relict Relict OC 0.0 Found 2009.0 \n31072 Österplana 060 56159 Relict Relict OC 0.0 Found 2009.0 \n31073 Österplana 061 56160 Relict Relict OC 0.0 Found 2009.0 \n31074 Österplana 062 56161 Relict Relict OC 0.0 Found 2010.0 \n31075 Österplana 063 56162 Relict Relict OC 0.0 Found 2010.0 \n31076 Österplana 064 56163 Relict Relict OC 0.0 Found 2011.0 \n\n reclat reclong GeoLocation \n12627 -12.26333 136.83833 (-12.263330, 136.838330) \n25551 0.00000 0.00000 (0.000000, 0.000000) \n31060 58.58333 13.43333 (58.583330, 13.433330) \n31061 58.58333 13.43333 (58.583330, 13.433330) \n31062 58.58333 13.43333 (58.583330, 13.433330) \n31063 58.58333 13.43333 (58.583330, 13.433330) \n31064 58.58333 13.43333 (58.583330, 13.433330) \n31065 58.58333 13.43333 (58.583330, 13.433330) \n31066 58.58333 13.43333 (58.583330, 13.433330) \n31067 58.58333 13.43333 (58.583330, 13.433330) \n31068 58.58333 13.43333 (58.583330, 13.433330) \n31069 58.58333 13.43333 (58.583330, 13.433330) \n31070 58.58333 13.43333 (58.583330, 13.433330) \n31071 58.58333 13.43333 (58.583330, 13.433330) \n31072 58.58333 13.43333 (58.583330, 13.433330) \n31073 58.58333 13.43333 (58.583330, 13.433330) \n31074 58.58333 13.43333 (58.583330, 13.433330) \n31075 58.58333 13.43333 (58.583330, 13.433330) \n31076 58.58333 13.43333 (58.583330, 13.433330) ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>id</th>\n <th>nametype</th>\n <th>recclass</th>\n <th>mass</th>\n <th>fall</th>\n <th>year</th>\n <th>reclat</th>\n <th>reclong</th>\n <th>GeoLocation</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>12627</th>\n <td>Gove</td>\n <td>52859</td>\n <td>Relict</td>\n <td>Relict iron</td>\n <td>0.0</td>\n <td>Found</td>\n <td>1979.0</td>\n <td>-12.26333</td>\n <td>136.83833</td>\n <td>(-12.263330, 136.838330)</td>\n </tr>\n <tr>\n <th>25551</th>\n <td>Miller Range 090478</td>\n <td>55953</td>\n <td>Valid</td>\n <td>CO3</td>\n <td>0.0</td>\n <td>Found</td>\n <td>2009.0</td>\n <td>0.00000</td>\n <td>0.00000</td>\n <td>(0.000000, 0.000000)</td>\n </tr>\n <tr>\n <th>31060</th>\n <td>Österplana 048</td>\n <td>56147</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>0.0</td>\n <td>Found</td>\n <td>2004.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31061</th>\n <td>Österplana 049</td>\n <td>56148</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>0.0</td>\n <td>Found</td>\n <td>2012.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31062</th>\n <td>Österplana 050</td>\n <td>56149</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>0.0</td>\n <td>Found</td>\n <td>2003.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31063</th>\n <td>Österplana 051</td>\n <td>56150</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>0.0</td>\n <td>Found</td>\n <td>2006.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31064</th>\n <td>Österplana 052</td>\n <td>56151</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>0.0</td>\n <td>Found</td>\n <td>2006.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31065</th>\n <td>Österplana 053</td>\n <td>56152</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>0.0</td>\n <td>Found</td>\n <td>2002.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31066</th>\n <td>Österplana 054</td>\n <td>56153</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>0.0</td>\n <td>Found</td>\n <td>2005.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31067</th>\n <td>Österplana 055</td>\n <td>56154</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>0.0</td>\n <td>Found</td>\n <td>2008.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31068</th>\n <td>Österplana 056</td>\n <td>56155</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>0.0</td>\n <td>Found</td>\n <td>2008.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31069</th>\n <td>Österplana 057</td>\n <td>56156</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>0.0</td>\n <td>Fo
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 12,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[data['mass'] == 0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Wygląda na to, że odnaleziono dużo meteorytów z masą równą 0 w tym samym miejscu. <br>\n",
"Po researchu, okazało się, że to nie są niepoprawne wartości. W Szwecji, znaleziono skamieniałe meteoryty, które są bardzo stare (setki miliony lat), przez co nie ma możliwości obliczenia ich masy. \n",
"Źródła:\n",
"- https://en.wikipedia.org/wiki/Österplana_065"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Fall</h4>"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:39:16.978125900Z",
"start_time": "2024-03-17T17:39:16.919959800Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "fall\nFound 44609\nFell 1107\nName: count, dtype: int64"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 13,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[\"fall\"].value_counts() "
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:39:19.777355700Z",
"start_time": "2024-03-17T17:39:19.570152100Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "<Figure size 800x600 with 1 Axes>",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArUAAAH5CAYAAACf0sbLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAjkUlEQVR4nO3df5BV9X3/8dcC7vJDdw0qSygQMSYqUWFAhW3atBrqqpipFaeaWkVFM1p01G1VaB1Qk8ZUx58DSloTsUkcf8RRIyjq4IgZRdGlNEiESb8xAynZBarsClFWYb9/ZLjjRkxEfqwfeTxm7kz2nPc953PuTK5P79x7rOrs7OwMAAAUrEd3LwAAAHaUqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4vXq7gV0py1btmT16tXZZ599UlVV1d3LAQDg93R2duatt97KoEGD0qPHh38eu0dH7erVqzNkyJDuXgYAAH/EqlWrMnjw4A/dv0dH7T777JPkdy9SbW1tN68GAIDf197eniFDhlS67cPs0VG79SsHtbW1ohYA4BPsj31V1A/FAAAonqgFAKB4ohYAgOKJWgAAiidqAQAonqgFAKB4ohYAgOKJWgAAiidqAQAonqgFAKB4ohYAgOKJWgAAiidqAQAonqgFAKB4ohYAgOKJWgAAiidqAQAonqgFAKB4vbp7AXualSvbs27d2929DGAX2n//Phk6tLa7lwGwRxG1u9HKle057LDv57e/fa+7lwLsQn379sprr50nbAF2I1G7G61b93Z++9v3cumNf5rBB9V193KAXeDXv2zLbVe8kHXr3ha1ALuRqO0Ggw+qy0Ff6t/dywAA+NTwQzEAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeDsUtd/5zndSVVWVyy67rLLtnXfeyeTJk7Pffvtl7733zoQJE9La2trleStXrsz48ePTt2/fDBgwIFdccUXee++9LjPPPvtsRo0alZqamhx88MGZPXv2B84/c+bMHHjggendu3fGjBmTRYsW7cjlAABQqI8dtS+//HK++93v5sgjj+yy/fLLL89jjz2WBx98MAsWLMjq1atz6qmnVvZv3rw548ePT0dHR1544YXcc889mT17dqZNm1aZef311zN+/Pgce+yxWbJkSS677LKcf/75efLJJysz999/f5qamjJ9+vQsXrw4I0aMSGNjY9asWfNxLwkAgEJ9rKjdsGFDzjzzzPzHf/xHPvOZz1S2t7W15Xvf+15uvvnmHHfccRk9enTuvvvuvPDCC3nxxReTJE899VR+/vOf54c//GFGjhyZE088Md/85jczc+bMdHR0JElmzZqVYcOG5aabbsphhx2Wiy++OKeddlpuueWWyrluvvnmXHDBBTn33HMzfPjwzJo1K3379s33v//9HXk9AAAo0MeK2smTJ2f8+PEZN25cl+3Nzc159913u2w/9NBDM3To0CxcuDBJsnDhwhxxxBGpr6+vzDQ2Nqa9vT3Lli2rzPz+sRsbGyvH6OjoSHNzc5eZHj16ZNy4cZWZbdm0aVPa29u7PAAAKF+v7X3Cfffdl8WLF+fll1/+wL6WlpZUV1dn33337bK9vr4+LS0tlZn3B+3W/Vv3/aGZ9vb2vP3223nzzTezefPmbc4sX778Q9d+/fXX59prr/1oFwoAQDG265PaVatW5dJLL82PfvSj9O7de1etaZeZOnVq2traKo9Vq1Z195IAANgJtitqm5ubs2bNmowaNSq9evVKr169smDBgtx+++3p1atX6uvr09HRkfXr13d5XmtrawYOHJgkGThw4AfuhrD17z82U1tbmz59+mT//fdPz549tzmz9RjbUlNTk9ra2i4PAADKt11R+9WvfjVLly7NkiVLKo+jjjoqZ555ZuV/77XXXpk/f37lOStWrMjKlSvT0NCQJGloaMjSpUu73KXg6aefTm1tbYYPH16Zef8xts5sPUZ1dXVGjx7dZWbLli2ZP39+ZQYAgD3Hdn2ndp999snhhx/eZVu/fv2y3377VbZPmjQpTU1N6d+/f2pra3PJJZekoaEhY8eOTZIcf/zxGT58eM4666zccMMNaWlpydVXX53JkyenpqYmSXLhhRdmxowZufLKK3PeeeflmWeeyQMPPJC5c+dWztvU1JSJEyfmqKOOyjHHHJNbb701GzduzLnnnrtDLwgAAOXZ7h+K/TG33HJLevTokQkTJmTTpk1pbGzMHXfcUdnfs2fPzJkzJxdddFEaGhrSr1+/TJw4Mdddd11lZtiwYZk7d24uv/zy3HbbbRk8eHDuuuuuNDY2VmZOP/30rF27NtOmTUtLS0tGjhyZefPmfeDHYwAAfPpVdXZ2dnb3IrpLe3t76urq0tbWtlu+X7t4cWtGj/5BbnzoxBz0pf67/HzA7vfLZW/kiglPpLn5rIwa5V+yAXbUR+21HfrP5AIAwCeBqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKt11Re+edd+bII49MbW1tamtr09DQkCeeeKKy/5133snkyZOz3377Ze+9986ECRPS2tra5RgrV67M+PHj07dv3wwYMCBXXHFF3nvvvS4zzz77bEaNGpWampocfPDBmT179gfWMnPmzBx44IHp3bt3xowZk0WLFm3PpQAA8CmyXVE7ePDgfOc730lzc3NeeeWVHHfccfnrv/7rLFu2LEly+eWX57HHHsuDDz6YBQsWZPXq1Tn11FMrz9+8eXPGjx+fjo6OvPDCC7nnnnsye/bsTJs2rTLz+uuvZ/z48Tn22GOzZMmSXHbZZTn//PPz5JNPVmbuv//+NDU1Zfr06Vm8eHFGjBiRxsbGrFmzZkdfDwAAClTV2dnZuSMH6N+/f2688cacdtppOeCAA3LvvffmtNNOS5IsX748hx12WBYuXJixY8fmiSeeyMknn5zVq1envr4+STJr1qxcddVVWbt2baqrq3PVVVdl7ty5efXVVyvnOOOMM7J+/frMmzcvSTJmzJgcffTRmTFjRpJky5YtGTJkSC655JJMmTLlQ9e6adOmbNq0qfJ3e3t7hgwZkra2ttTW1u7Iy/CRLF7cmtGjf5AbHzoxB32p/y4/H7D7/XLZG7liwhNpbj4ro0bVd/dyAIrX3t6eurq6P9prH/s7tZs3b859992XjRs3pqGhIc3NzXn33Xczbty4ysyhhx6aoUOHZuHChUmShQsX5ogjjqgEbZI0Njamvb298mnvwoULuxxj68zWY3R0dKS5ubnLTI8
2024-03-17 18:28:15 +01:00
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(8, 6))\n",
"plt.bar([\"Fell\",\"Found\"], data[\"fall\"].value_counts(), color=[\"lightblue\", \"lightgreen\"], edgecolor= [\"darkblue\", \"darkgreen\"])\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Klasa meteorytu</h4>"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:39:22.685477700Z",
"start_time": "2024-03-17T17:39:22.622081100Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Liczba klas meteorytow: 466\n",
"10 najpopularniejszych klas:\n"
]
},
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "recclass\nL6 8285\nH5 7142\nL5 4796\nH6 4528\nH4 4211\nLL5 2766\nLL6 2043\nL4 1253\nH4/5 428\nCM2 416\nName: count, dtype: int64"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 15,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"class_count = data['recclass'].nunique()\n",
"print(f'Liczba klas meteorytow: {class_count}')\n",
"top_10 = data['recclass'].value_counts().head(10)\n",
"print(\"10 najpopularniejszych klas:\")\n",
"top_10"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Lokalizacja</h4>"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 19,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:01.551564400Z",
"start_time": "2024-03-17T17:40:53.770275Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-17 18:42:28 +01:00
"j:\\.AppData\\Python\\Python310\\site-packages\\pyproj\\crs\\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n",
2024-03-17 18:28:15 +01:00
" in_crs_string = _prepare_from_proj_string(in_crs_string)\n",
2024-03-17 18:42:28 +01:00
"C:\\Users\\s464914\\AppData\\Local\\temp\\ipykernel_5176\\2086382282.py:8: FutureWarning: The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.\n",
2024-03-17 18:28:15 +01:00
" world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))\n"
]
},
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "<Axes: >"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 19,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "<Figure size 2000x1000 with 1 Axes>",
"image/png": "iVBORw0KGgoAAAANSUhEUgAABkgAAAIkCAYAAABV8acMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUZfbA8e/0kslk0ntC70gRUQERrFgRXF172V2764JYFntZdW0r68/eexfsoiuCKCDSew+k9zZJps/c3x+TDAkphJBKzud58gy5c+fOe2fClPe85xyVoigKQgghhBBCCCGEEEIIIYQQvYi6qwcghBBCCCGEEEIIIYQQQgjR2SRAIoQQQgghhBBCCCGEEEKIXkcCJEIIIYQQQgghhBBCCCGE6HUkQCKEEEIIIYQQQgghhBBCiF5HAiRCCCGEEEIIIYQQQgghhOh1JEAihBBCCCGEEEIIIYQQQoheRwIkQgghhBBCCCGEEEIIIYTodSRAIoQQQgghhBBCCCGEEEKIXkfb1QM4XIFAgLy8PMLDw1GpVF09HCGEEEIIIYQQQgghhBBCdCFFUaiqqiIpKQm1uvk8kR4fIMnLyyM1NbWrhyGEEEIIIYQQQgghhBBCiG4kOzublJSUZq/v8QGS8PBwIHiiVqu1i0cjhBBCCCGEEEIIIYQQQoiuZLfbSU1NDcUPmtPjAyR1ZbWsVqsESIQQQgghhBBCCCGEEEIIAXDQthzSpF0IIYQQQgghhBBCCCGEEL2OBEiEEEIIIYQQQgghhBBCCNHrSIBECCGEEEIIIYQQQgghhBC9jgRIhBBCCCGEEEIIIYQQQgjR60iARAghhBBCCCGEEEIIIYQQvY4ESIQQQgghhBBCCCGEEEII0etIgEQIIYQQQgghhBBCCCGEEL2OBEiEEEIIIYQQQgghhBBCCNHrSIBECCGEEEIIIYQQQgghhBC9jgRIhBBCCCGEEEIIIYQQQgjR60iARAghhBBCCCGEEEIIIYQQvY4ESIQQQgghhBBCCCGEEEII0etIgEQIIYQQQgghhBBCCCGEEL2OBEiEEEIIIYQQQgghhBBCCNHrSIBECCGEEEIIIYQQQgghhBC9jgRIhBBCCCGEEEIIIYQQQgjR60iARAghhBBCCCGEEEIIIYQQvY4ESIQQQgghhBBCCCGEEEII0etIgEQIIYQQQgghhBBCCCGEEL2OtqsHIIQQQoi28fkDePwBPL7gjy+goFGr0KhVaEOX6tDvarWqq4cshBBCCCGEEEII0W1IgEQIIUSb+QMKABqZeD8kihJ83FSqlh+3rFIHi3cUsXhHEbsKq3H7Anh8/lBQpPbhbzWVilDgRKdWo9E0HUgJBVk0KoxaDUk2EymRJlKjzMHLSDNJNhN6rSSiCiGEEEIIIYQQoueSAIkQQhyhXF4/OeUOcsqd5FY4Ka5yY3f6sLu8VLm82J0+PP5g1kEgoNS7DE68+wMKqVHByfAqt48ql48ql7fBpcPjB4IBEoNWjV6rRq9RY9DVXmo16LVqDFo1Bp0mdJ2h/j612406NeFGHVaTlnCDDqtJR7hRS2KEEZtZ38WPZtsEAgo55U62FdjZnl/FjsLg5d7SGhQl+LhpVCrUatCq1ahVhIITigKlNZ52HY+igNev4PUruAi0/oaZ5Y02qVSQYDWGAiYpkSZS6gVQEiOMaDUSQBFCCCGEEEIIIUT3JQESIYToYVxeP2syy9mQU4FWrcKk12IxaDgqxUa/mDBUKhU/by/k9k83HvYEe1aZAyg96H7+gILD4w8FTNpbpFlH35gw+sSEcVy/aC4cl9oh99MeVu0rY/7aXLYX2NlRUNXiY+IPKPhRwA8cSsCiG1AUyK90kV/pYtW+hgEUjVrFiCQrp49I4KyRiaRHh3XRKIUQQgghhBBCCCGaJwESIYToJgIBBY8/gNsXwO31U+bwUFrtobTGQ1m1m6IqN6szy1mfVYHH3/RkerzVwMC4cH7bXdLJo+84ahVEWwwk2kz0iwmjb0zzk+2BgILD66fG7aPa7QtdenwBFIDaklQKCooSnOQHSIgwMizR2qBHh9vnZ3t+FRtzKtiQU8nGnAo8vgBHmXzc+OGT7Jr7MEkjBzLAXU7E3DvglVcgOpofNhfw+docPL7uF/BQqWB8nyiiLXq0ajXa2hJbWo0arVqFP6AQUBT8tb1M4sKNGHTqYDmu2swWdW3Wi0YNalXtNtX+7QkRBoYlRmDSa7r6dIUQQgghhBBCCCFaJAESIYToIr9nlPLxqmyyyxxklzsotLsP+5iFdne7HKezqFQQazEQFaYnpvYy2qInOkxPQoSJIQnhDIizYNRpqHb72JBdwb6SGn7fU0pepYuCSif5lS7KHR6qXT4cXn8o6HGoYiwGThwUi1mvYWNOBdvyq5oMRN254FGG7FyO6fJNzD5rDs98+zQRFQUs2VHMY3/9F+FGLVMGxeLyBah0eKhweimr8VDl8h3mo3X4FAXWZVWQEmXC7a0Nxvn8tb1NGp+rWgXxViNJNhOJEcHLpAgjcTYT4UYtmaUOthdUsae4mj3F1VTUeJkxNpnoMAN9Wghk1fEHFCocHspq6v04PJRVe3B4/YQbtUSYdESYdFiNtZe1v0eYdNL7RgghhBBCCCGEEIdFAiRCCNFFhiZYiQs3kFFcTXFVzwlqtAe9Vs2YVBvj+kRiMegw6zUc0yeKIQnhDbI46jNq1by5bB8/bSvskDGVVLv5fG3OQfd76ORrGFaUQXpFAfPfvx2ATFsCcyf/hfzCqg4ZW3vy+ANkFNe0at9AvTJarVHXb8Zq0lFS7WZfSQ0ZJTVklTooqXZTVuOh3BHMiiqvCQaP2hrQGptm47lLxpJkM7XtAEIIIYQQQgghhOj1JEAihBAdZMWeUl7/bS8urx+PP4DXH8DnV0iIMHLpsWlMHhjL3DOHAmB3eflmQz4vLNlNTrmzi0feCZRg9ohBq+GYPpEclWJDrw029HZ5/ewoqGJjbiWbcirILnNS7vBQ4fBS7mjfpuVtkW+NZfZZc0LBEYDZZ80h3xrbhaPqeioVjE6xsXJvGR+tWtzuGTNatYr+sRYGJYQzJCGcwfHhaCWDRAjRAewuL1vz7BTaXRTaXRRUuimsclFsd9MnxsyE/jFM6B9NnNXY1UMVQgghhBBCHCaVorR17Wb3YLfbiYiIoLKyEqvV2tXDEUL0YjVuH5+szuaDlVnklDtxeltuWN4n2syFx6QyNMFKapSJZJsZrUbFb7tK2JRbyebcSrbk2cmtOHjARK0Kttfojq/o4QYtI1MiGBQfnNgelBDOsEQrRp0Gjy/AjoIqNuVWsim3go05lewoqMIX6IYnUivRXsxHH84lvaIgtC3TlsBFFz/W64Mk7SXZFiyvNrjeT78YSyiIJjqW0+PHqFOjUqkabd9TXE20JVgST6c5cp+PGrePAruLwtoMqqIqNxVOD4oS7HUUUOCUoXHEWQ18uT6PrXl2dhVVE2bQ0jfGTL8YCxeOSyUt2tzVpyJaweX1szarnGW7S1i2u5SNORW05m1oQJyFif2jOXd0MmPTbI3+zwghhBBCCCG6TmvjBhIgEUKIdvC/rYXc+8VmCuytK0XUnBiLgeRIE8k2I4kRJpJsJoYlhpNsM+PxByitdmPUaTDqNBi06tClTqvGrNPgrG1QvqOwiiU7ilmyo4g9rSyn1N5iww1cc0JfLjk2HYthf8KioiisyCjllaUZLN9d2mzD+e7qxQWPcsbO5WTaEkI9SNIrCvh+0ARumHFXVw+v2zPpNFiMWsINWixGLRbD/p+ECCPnH51C/1hLVw/ziOby+vl6Qx7ZZQ6Kqz2UVLv3/1R5cHr9hOk1DIizMCAunLQoMxtyKli2uwR3ba8alQqiw/TEhhuJtxqICw/2nRmaYGVIYjgJVuMRM1msKAo55U7u+GwjKzJKQ9v1GjVp0WamDo4l2mIgOkxPnDX4eCRYjdjM+i4ctWiN3AonH67M4tM12Yfdv2tEspUrju/DuaOSMOo07TRCIYQQQgghRFtJgEQIITqJP6Dw1I87eOmXPR2SwaHTqHh0xkjOHZ3EJa+uZE1meZP
2024-03-17 18:28:15 +01:00
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import geopandas as gpd\n",
"from shapely.geometry import Point\n",
"\n",
"loc_crs = {'init': 'epsg:4326'}\n",
"loc_geom = [Point(xy) for xy in zip(data['reclong'], data['reclat'])]\n",
"geo_df = gpd.GeoDataFrame(data, crs=loc_crs, geometry=loc_geom)\n",
"\n",
"world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))\n",
"geo_df.plot(ax=world.plot(figsize=(20, 10)), marker='x', color='red', markersize=15)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Normalizacja danych</h4>"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 20,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:01.812906Z",
"start_time": "2024-03-17T17:41:01.555020700Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "0 -0.023056\n1 -0.021841\n2 0.163000\n3 -0.019764\n4 -0.021736\n ... \n45711 -0.022794\n45712 -0.023013\n45713 -0.023087\n45714 -0.019324\n45715 -0.022745\nName: mass, Length: 45716, dtype: float64"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 20,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"scaler = StandardScaler()\n",
"\n",
"meteorite_train['mass'] = scaler.fit_transform(meteorite_train[['mass']])\n",
"meteorite_test['mass'] = scaler.fit_transform(meteorite_test[['mass']])\n",
"meteorite_val['mass'] = scaler.fit_transform(meteorite_val[['mass']])\n",
"\n",
"data['mass'] = scaler.fit_transform(data[['mass']])\n",
"\n",
"data['mass']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Czyszczenie zbioru</h4>"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 21,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:04.621115800Z",
"start_time": "2024-03-17T17:41:04.556475900Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "name 45716\nid 45716\nnametype 2\nrecclass 466\nmass 12576\nfall 2\nyear 268\nreclat 12738\nreclong 14640\nGeoLocation 17100\ndtype: int64"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 21,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.nunique()"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 22,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:06.233555600Z",
"start_time": "2024-03-17T17:41:06.126893600Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "name 0\nid 0\nnametype 0\nrecclass 0\nmass 131\nfall 0\nyear 288\nreclat 7315\nreclong 7315\nGeoLocation 7315\ndtype: int64"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 22,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.isna().sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Według dokumentacji: \n",
"<br>\n",
"reclant - szerokość geograficzna\n",
"<br>\n",
"reclong - długość geograficzna"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 24,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:15.570920100Z",
"start_time": "2024-03-17T17:41:15.505376100Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "(7315, 10)"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 24,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"filtered_data = data.loc[data['reclat'].isnull() & data['reclong'].isnull() & data['GeoLocation'].isnull()]\n",
"filtered_data.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Wnioski:\n",
"Miejsca, w których brakuje zarówno szerokości geograficznej, jak i długości geograficznej, zazwyczaj nie posiadają również informacji o całej geolokacji. Z uwagi na powiązanie tych trzech parametrów, zamiast próbować uzupełniać brakujące dane, wiersze zawierające braki w tych trzech obszarach zostaną usunięte."
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 25,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:18.557403200Z",
"start_time": "2024-03-17T17:41:18.449579400Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "name 0\nid 0\nnametype 0\nrecclass 0\nmass 119\nfall 0\nyear 175\nreclat 0\nreclong 0\nGeoLocation 0\ndtype: int64"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 25,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = data.dropna(subset=['reclat'])\n",
"meteorite_train = meteorite_train.dropna(subset=['reclat'])\n",
"meteorite_test = meteorite_test.dropna(subset=['reclat'])\n",
"meteorite_val = meteorite_val.dropna(subset=['reclat'])\n",
"\n",
"data.isna().sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Według dokumentacji:\n",
"- a few entries here contain date information that was incorrectly parsed into the NASA database. As a spot check: any date that is before 860 CE or after 2016 are incorrect; these should actually be BCE years. There may be other errors and we are looking for a way to identify them.\n",
"- a few entries have latitude and longitude of 0N/0E (off the western coast of Africa, where it would be quite difficult to recover meteorites). Many of these were actually discovered in Antarctica, but exact coordinates were not given. 0N/0E locations should probably be treated as NA."
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 26,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:21.026944200Z",
"start_time": "2024-03-17T17:41:20.935502400Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": " name id nametype recclass mass \\\n16356 Havana 11857 Valid Iron, IAB complex NaN \n30679 Northwest Africa 7701 57150 Valid CK6 -0.022997 \n38188 Ur 24125 Valid Iron NaN \n38301 Wietrzno-Bobrka 24259 Valid Iron -0.022439 \n\n fall year reclat reclong GeoLocation \n16356 Found 301.0 40.33333 -90.05000 (40.333330, -90.050000) \n30679 Found 2101.0 0.00000 0.00000 (0.000000, 0.000000) \n38188 Found 2501.0 30.90000 46.01667 (30.900000, 46.016670) \n38301 Found 601.0 49.41667 21.70000 (49.416670, 21.700000) ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>id</th>\n <th>nametype</th>\n <th>recclass</th>\n <th>mass</th>\n <th>fall</th>\n <th>year</th>\n <th>reclat</th>\n <th>reclong</th>\n <th>GeoLocation</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>16356</th>\n <td>Havana</td>\n <td>11857</td>\n <td>Valid</td>\n <td>Iron, IAB complex</td>\n <td>NaN</td>\n <td>Found</td>\n <td>301.0</td>\n <td>40.33333</td>\n <td>-90.05000</td>\n <td>(40.333330, -90.050000)</td>\n </tr>\n <tr>\n <th>30679</th>\n <td>Northwest Africa 7701</td>\n <td>57150</td>\n <td>Valid</td>\n <td>CK6</td>\n <td>-0.022997</td>\n <td>Found</td>\n <td>2101.0</td>\n <td>0.00000</td>\n <td>0.00000</td>\n <td>(0.000000, 0.000000)</td>\n </tr>\n <tr>\n <th>38188</th>\n <td>Ur</td>\n <td>24125</td>\n <td>Valid</td>\n <td>Iron</td>\n <td>NaN</td>\n <td>Found</td>\n <td>2501.0</td>\n <td>30.90000</td>\n <td>46.01667</td>\n <td>(30.900000, 46.016670)</td>\n </tr>\n <tr>\n <th>38301</th>\n <td>Wietrzno-Bobrka</td>\n <td>24259</td>\n <td>Valid</td>\n <td>Iron</td>\n <td>-0.022439</td>\n <td>Found</td>\n <td>601.0</td>\n <td>49.41667</td>\n <td>21.70000</td>\n <td>(49.416670, 21.700000)</td>\n </tr>\n </tbody>\n</table>\n</div>"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 26,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[(data['year'] > 2016) | (data['year'] < 860)]"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 27,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:21.974610100Z",
"start_time": "2024-03-17T17:41:21.857117900Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": " name id nametype recclass mass fall \\\n37 Northwest Africa 5815 50693 Valid L5 -0.022646 Found \n596 Mason Gully 53653 Valid H5 -0.023050 Fell \n1648 Allan Hills 09004 52119 Valid Howardite -0.022707 Found \n1649 Allan Hills 09005 55797 Valid L5 -0.022880 Found \n1650 Allan Hills 09006 55798 Valid H5 -0.022912 Found \n... ... ... ... ... ... ... \n45655 Yamato 984144 40764 Valid H6 -0.023028 Found \n45656 Yamato 984145 40765 Valid L6 -0.022998 Found \n45657 Yamato 984146 40766 Valid H3 -0.023059 Found \n45658 Yamato 984147 40767 Valid LL6 -0.022886 Found \n45659 Yamato 984148 40768 Valid L5 -0.023085 Found \n\n year reclat reclong GeoLocation \n37 NaN 0.0 0.0 (0.000000, 0.000000) \n596 2010.0 0.0 0.0 (0.000000, 0.000000) \n1648 2009.0 0.0 0.0 (0.000000, 0.000000) \n1649 2009.0 0.0 0.0 (0.000000, 0.000000) \n1650 2009.0 0.0 0.0 (0.000000, 0.000000) \n... ... ... ... ... \n45655 1998.0 0.0 0.0 (0.000000, 0.000000) \n45656 1998.0 0.0 0.0 (0.000000, 0.000000) \n45657 1998.0 0.0 0.0 (0.000000, 0.000000) \n45658 1998.0 0.0 0.0 (0.000000, 0.000000) \n45659 1998.0 0.0 0.0 (0.000000, 0.000000) \n\n[6214 rows x 10 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>id</th>\n <th>nametype</th>\n <th>recclass</th>\n <th>mass</th>\n <th>fall</th>\n <th>year</th>\n <th>reclat</th>\n <th>reclong</th>\n <th>GeoLocation</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>37</th>\n <td>Northwest Africa 5815</td>\n <td>50693</td>\n <td>Valid</td>\n <td>L5</td>\n <td>-0.022646</td>\n <td>Found</td>\n <td>NaN</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>(0.000000, 0.000000)</td>\n </tr>\n <tr>\n <th>596</th>\n <td>Mason Gully</td>\n <td>53653</td>\n <td>Valid</td>\n <td>H5</td>\n <td>-0.023050</td>\n <td>Fell</td>\n <td>2010.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>(0.000000, 0.000000)</td>\n </tr>\n <tr>\n <th>1648</th>\n <td>Allan Hills 09004</td>\n <td>52119</td>\n <td>Valid</td>\n <td>Howardite</td>\n <td>-0.022707</td>\n <td>Found</td>\n <td>2009.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>(0.000000, 0.000000)</td>\n </tr>\n <tr>\n <th>1649</th>\n <td>Allan Hills 09005</td>\n <td>55797</td>\n <td>Valid</td>\n <td>L5</td>\n <td>-0.022880</td>\n <td>Found</td>\n <td>2009.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>(0.000000, 0.000000)</td>\n </tr>\n <tr>\n <th>1650</th>\n <td>Allan Hills 09006</td>\n <td>55798</td>\n <td>Valid</td>\n <td>H5</td>\n <td>-0.022912</td>\n <td>Found</td>\n <td>2009.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>(0.000000, 0.000000)</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>45655</th>\n <td>Yamato 984144</td>\n <td>40764</td>\n <td>Valid</td>\n <td>H6</td>\n <td>-0.023028</td>\n <td>Found</td>\n <td>1998.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>(0.000000, 0.000000)</td>\n </tr>\n <tr>\n <th>45656</th>\n <td>Yamato 984145</td>\n <td>40765</td>\n <td>Valid</td>\n <td>L6</td>\n <td>-0.022998</td>\n <td>Found</td>\n <td>1998.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>(0.000000, 0.000000)</td>\n </tr>\n <tr>\n <th>45657</th>\n <td>Yamato 984146</td>\n <td>40766</td>\n <td>Valid</td>\n <td>H3</td>\n <td>-0.023059</td>\n <td>Found</td>\n <td>1998.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>(0.000000, 0.000000)</td>\n </tr>\n <tr>\n <th>45658</th>\n <td>Yamato 984147</td>\n <td>40767</td>\n <td>Valid</td>\n <td>LL6</td>\n <td>-0.022886</td>\n <td>Found</td>\n <td>1998.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>(0.000000, 0.000000)</td>\n </tr>\n <tr>\n <th>45659</th>\n <td>Yamato 984148</td>\n <td>40768</td>\n <td>Valid</td>\n <td>L5</td>\n <td>-0.023085</td>\n <td>Found</td>\n <td>1998.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>(0.000000, 0.000000)</td>\n </tr>\n </tbody>\n</table>\n<p>6214 rows × 10 columns</p>\n</div>"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 27,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[(data['reclat'] == 0) & (data['reclong'] == 0)]"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 28,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:22.852305500Z",
"start_time": "2024-03-17T17:41:22.794894400Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [],
"source": [
"incorrect_years_index = data.loc[(data['year'] > 2016) | (data['year'] < 860)].index\n",
"incorrect_location_index = data.loc[(data['reclat'] == 0) & (data['reclong'] == 0)].index\n",
"\n",
"incorrect_years_index_train = meteorite_train.loc[(meteorite_train['year'] > 2016) | (meteorite_train['year'] < 860)].index\n",
"incorrect_location_index_train = meteorite_train.loc[(meteorite_train['reclat'] == 0) & (meteorite_train['reclong'] == 0)].index\n",
"\n",
"incorrect_years_index_test = meteorite_test.loc[(meteorite_test['year'] > 2016) | (meteorite_test['year'] < 860)].index\n",
"incorrect_location_index_test = meteorite_test.loc[(meteorite_test['reclat'] == 0) & (meteorite_test['reclong'] == 0)].index\n",
"\n",
"incorrect_years_index_val = meteorite_val.loc[(meteorite_val['year'] > 2016) | (meteorite_val['year'] < 860)].index\n",
"incorrect_location_index_val = meteorite_val.loc[(meteorite_val['reclat'] == 0) & (meteorite_val['reclong'] == 0)].index\n",
"\n",
"data.drop(incorrect_years_index.union(incorrect_location_index), inplace=True)\n",
"meteorite_test.drop(incorrect_years_index_test.union(incorrect_location_index_test), inplace=True)\n",
"meteorite_train.drop(incorrect_years_index_train.union(incorrect_location_index_train), inplace=True)\n",
"meteorite_val.drop(incorrect_years_index_val.union(incorrect_location_index_val), inplace=True)"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 29,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:24.196341600Z",
"start_time": "2024-03-17T17:41:24.093136800Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "name 0\nid 0\nnametype 0\nrecclass 0\nmass 117\nfall 0\nyear 147\nreclat 0\nreclong 0\nGeoLocation 0\ndtype: int64"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 29,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.isna().sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We wcześniejszych obserwacjach zostało zauważone, że wszystkie meteoryty odnalezione w Szwecji, Österplana mają niską mase przez brak możliwości jej obliczenia. Dlatego wszystkie meteoryty odnalezione w tym miejscu z masą Null zostaną dopisane do tej grupy przypisując im mase 0"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 30,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:27.158515600Z",
"start_time": "2024-03-17T17:41:27.096263300Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": " name id nametype recclass mass fall year \\\n31014 Österplana 002 44802 Relict Relict OC NaN Found 1993.0 \n31015 Österplana 003 44803 Relict Relict OC NaN Found 1993.0 \n31016 Österplana 004 44804 Relict Relict OC NaN Found 1994.0 \n31017 Österplana 005 44805 Relict Relict OC NaN Found 1990.0 \n31018 Österplana 006 44806 Relict Relict OC NaN Found NaN \n31019 Österplana 007 44807 Relict Relict OC NaN Found 1993.0 \n31020 Österplana 008 44808 Relict Relict OC NaN Found 1995.0 \n31021 Österplana 009 44809 Relict Relict OC NaN Found 1996.0 \n31022 Österplana 010 44810 Relict Relict OC NaN Found 1995.0 \n31023 Österplana 011 44811 Relict Relict OC NaN Found 1997.0 \n31024 Österplana 012 44812 Relict Relict OC NaN Found 1996.0 \n31025 Österplana 013 44813 Relict Relict OC NaN Found 1996.0 \n31026 Österplana 014 44814 Relict Relict OC NaN Found 1996.0 \n31027 Österplana 015 44815 Relict Relict OC NaN Found 1996.0 \n31028 Österplana 016 44816 Relict Relict OC NaN Found 1996.0 \n31029 Österplana 017 44817 Relict Relict OC NaN Found 1997.0 \n31030 Österplana 018 44818 Relict Relict OC NaN Found 1996.0 \n31031 Österplana 019 44819 Relict Relict OC NaN Found 1997.0 \n31032 Österplana 020 44820 Relict Relict OC NaN Found 1997.0 \n31033 Österplana 021 44821 Relict Relict OC NaN Found 1997.0 \n31034 Österplana 022 44822 Relict Relict OC NaN Found 1999.0 \n31035 Österplana 023 44823 Relict Relict OC NaN Found 1999.0 \n31036 Österplana 024 44824 Relict Relict OC NaN Found 1999.0 \n31037 Österplana 025 44825 Relict Relict OC NaN Found 2000.0 \n31038 Österplana 026 44826 Relict Relict OC NaN Found 2000.0 \n31039 Österplana 027 44827 Relict Relict OC NaN Found 2000.0 \n31040 Österplana 028 44828 Relict Relict OC NaN Found 2000.0 \n31041 Österplana 029 44829 Relict Relict OC NaN Found 1998.0 \n31042 Österplana 030 44830 Relict Relict OC NaN Found 1994.0 \n31043 Österplana 031 44831 Relict Relict OC NaN Found 1998.0 \n31044 Österplana 032 44832 Relict Relict OC NaN Found 2000.0 \n31045 Österplana 033 44833 Relict Relict OC NaN Found 2000.0 \n31046 Österplana 034 44834 Relict Relict OC NaN Found 1998.0 \n31047 Österplana 035 44835 Relict Relict OC NaN Found 1996.0 \n31048 Österplana 036 44836 Relict Relict OC NaN Found 1996.0 \n31049 Österplana 037 44837 Relict Relict OC NaN Found 1998.0 \n31050 Österplana 038 44838 Relict Relict OC NaN Found 1999.0 \n31051 Österplana 039 44839 Relict Relict OC NaN Found 2000.0 \n31052 Österplana 040 44840 Relict Relict OC NaN Found 2000.0 \n31053 Österplana 041 44841 Relict Relict OC NaN Found 1996.0 \n31054 Österplana 042 44842 Relict Relict OC NaN Found 2000.0 \n31055 Österplana 043 44843 Relict Relict OC NaN Found 2002.0 \n31056 Österplana 044 44844 Relict Relict OC NaN Found 2002.0 \n31057 Österplana 045 44845 Relict Relict OC NaN Found 2002.0 \n31058 Österplana 046 44846 Relict Relict OC NaN Found 2002.0 \n31059 Österplana 047 44847 Relict Relict OC NaN Found 2002.0 \n\n reclat reclong GeoLocation \n31014 58.58333 13.43333 (58.583330, 13.433330) \n31015 58.58333 13.43333 (58.583330, 13.433330) \n31016 58.58333 13.43333 (58.583330, 13.433330) \n31017 58.58333 13.43333 (58.583330, 13.433330) \n31018 58.58333 13.43333 (58.583330, 13.433330) \n31019 58.58333 13.43333 (58.583330, 13.433330) \n31020 58.58333 13.43333 (58.583330, 13.433330) \n31021 58.58333 13.43333 (58.583330, 13.433330) \n31022 58.58333 13.43333 (58.583330, 13.433330) \n31023 58.58333
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>id</th>\n <th>nametype</th>\n <th>recclass</th>\n <th>mass</th>\n <th>fall</th>\n <th>year</th>\n <th>reclat</th>\n <th>reclong</th>\n <th>GeoLocation</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>31014</th>\n <td>Österplana 002</td>\n <td>44802</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>NaN</td>\n <td>Found</td>\n <td>1993.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31015</th>\n <td>Österplana 003</td>\n <td>44803</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>NaN</td>\n <td>Found</td>\n <td>1993.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31016</th>\n <td>Österplana 004</td>\n <td>44804</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>NaN</td>\n <td>Found</td>\n <td>1994.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31017</th>\n <td>Österplana 005</td>\n <td>44805</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>NaN</td>\n <td>Found</td>\n <td>1990.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31018</th>\n <td>Österplana 006</td>\n <td>44806</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>NaN</td>\n <td>Found</td>\n <td>NaN</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31019</th>\n <td>Österplana 007</td>\n <td>44807</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>NaN</td>\n <td>Found</td>\n <td>1993.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31020</th>\n <td>Österplana 008</td>\n <td>44808</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>NaN</td>\n <td>Found</td>\n <td>1995.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31021</th>\n <td>Österplana 009</td>\n <td>44809</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>NaN</td>\n <td>Found</td>\n <td>1996.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31022</th>\n <td>Österplana 010</td>\n <td>44810</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>NaN</td>\n <td>Found</td>\n <td>1995.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31023</th>\n <td>Österplana 011</td>\n <td>44811</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>NaN</td>\n <td>Found</td>\n <td>1997.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31024</th>\n <td>Österplana 012</td>\n <td>44812</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>NaN</td>\n <td>Found</td>\n <td>1996.0</td>\n <td>58.58333</td>\n <td>13.43333</td>\n <td>(58.583330, 13.433330)</td>\n </tr>\n <tr>\n <th>31025</th>\n <td>Österplana 013</td>\n <td>44813</td>\n <td>Relict</td>\n <td>Relict OC</td>\n <td>NaN</td>\n
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 30,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[(data['mass'].isnull()) & (data['name'].str.startswith('Österplana'))]"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 31,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:28.521079200Z",
"start_time": "2024-03-17T17:41:28.451610500Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [],
"source": [
"data.loc[(data['mass'].isnull()) & (data['name'].str.startswith('Österplana')), 'mass'] = 0\n",
"meteorite_test.loc[(meteorite_test['mass'].isnull()) & (meteorite_test['name'].str.startswith('Österplana')), 'mass'] = 0\n",
"meteorite_train.loc[(meteorite_train['mass'].isnull()) & (meteorite_train['name'].str.startswith('Österplana')), 'mass'] = 0\n",
"meteorite_val.loc[(meteorite_val['mass'].isnull()) & (meteorite_val['name'].str.startswith('Österplana')), 'mass'] = 0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Reszta zostanie usunięta, tak samo z latami"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 32,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:30.682656300Z",
"start_time": "2024-03-17T17:41:30.611250500Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "name 0\nid 0\nnametype 0\nrecclass 0\nmass 0\nfall 0\nyear 0\nreclat 0\nreclong 0\nGeoLocation 0\ndtype: int64"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 32,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.dropna(subset=['mass', 'year'], inplace=True)\n",
"meteorite_train.dropna(subset=['mass', 'year'], inplace=True)\n",
"meteorite_test.dropna(subset=['mass', 'year'], inplace=True)\n",
"meteorite_val.dropna(subset=['mass', 'year'], inplace=True)\n",
"data.isnull().sum()"
]
},
{
"cell_type": "code",
2024-03-17 18:42:28 +01:00
"execution_count": 33,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-17T17:41:36.021380200Z",
"start_time": "2024-03-17T17:41:31.594483300Z"
}
},
2024-03-17 18:28:15 +01:00
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-17 18:42:28 +01:00
"j:\\.AppData\\Python\\Python310\\site-packages\\pyproj\\crs\\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n",
2024-03-17 18:28:15 +01:00
" in_crs_string = _prepare_from_proj_string(in_crs_string)\n",
2024-03-17 18:42:28 +01:00
"C:\\Users\\s464914\\AppData\\Local\\temp\\ipykernel_5176\\3992296465.py:4: FutureWarning: The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.\n",
2024-03-17 18:28:15 +01:00
" world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))\n"
]
},
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "<Axes: >"
2024-03-17 18:28:15 +01:00
},
2024-03-17 18:42:28 +01:00
"execution_count": 33,
2024-03-17 18:28:15 +01:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
2024-03-17 18:42:28 +01:00
"text/plain": "<Figure size 2000x1000 with 1 Axes>",
"image/png": "iVBORw0KGgoAAAANSUhEUgAABkgAAAMYCAYAAACAPmtuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzddXxb59UH8N8VM9qyzAyxw8xNmobKTdqVaWVa23Vdaeu6boV3XdduKzOl3LRdKSmEGmawg2Zmi1n3vn/IduKYZFuyDOf7+ShXlqWrR44t3fuc55zDcBzHgRBCCCGEEEIIIYQQQgghZBThRXoAhBBCCCGEEEIIIYQQQgghg40CJIQQQgghhBBCCCGEEEIIGXUoQEIIIYQQQgghhBBCCCGEkFGHAiSEEEIIIYQQQgghhBBCCBl1KEBCCCGEEEIIIYQQQgghhJBRhwIkhBBCCCGEEEIIIYQQQggZdShAQgghhBBCCCGEEEIIIYSQUYcCJIQQQgghhBBCCCGEEEIIGXUEkR7AQLEsi+rqaiiVSjAME+nhEEIIIYQQQgghhBBCCCEkgjiOg9VqRVxcHHi87vNEhn2ApLq6GomJiZEeBiGEEEIIIYQQQgghhBBChpCKigokJCR0+/1hHyBRKpUAAi9UpVJFeDSEEEIIIYQQQgghhBBCCIkki8WCxMTE9vhBd4Z9gKStrJZKpaIACSGEEEIIIYQQQgghhBBCAKDXthzUpJ0QQgghhBBCCCGEEEIIIaMOBUgIIYQQQgghhBBCCCGEEDLqUICEEEIIIYQQQgghhBBCCCGjDgVICCGEEEIIIYQQQgghhBAy6lCAhBBCCCGEEEIIIYQQQgghow4FSAghhBBCCCGEEEIIIYQQMupQgIQQQgghhBBCCCGEEEIIIaMOBUgIIYQQQgghhBBCCCGEEDLqUICEEEIIIYQQQgghhBBCCCGjDgVICCGEEEIIIYQQQgghhBAy6lCAhBBCCCGEEEIIIYQQQgghow4FSAghhBBCCCGEEEIIIYQQMupQgIQQQgghhBBCCCGEEEIIIaMOBUgIIYQQQgghhBBCCCGEEDLqUICEEEIIIYQQQgghhBBCCCGjDgVICCGEEEIIIYQQQgghhBAy6lCAhBBCCCGEEEIIIYQQQgghow4FSAghhBBCCCGEEEIIIYQQMupQgIQQQgghhBBCCCGEEEIIIaMOBUgIIYQQQgghhBBCCCGEEDLqUICEEEIIIYQQQgghhBBCCCGjDgVICCGEEEIIIYQQQgghhBAy6lCAhBBCCCGEEEIIIYQQQgghow4FSAghhBBCCCGEEEIIIYQQMupQgIQQQgghhBBCCCGEEEIIIaMOBUgIIYQQQgghhBBCCCGEEDLqUICEEEIIIYQQQgghhBBCCCGjDgVICCGEEEIIIYQQQgghhBAy6lCAhBBCCCGEEEIIIYQQQgghow4FSAghhBBCCCGEEEIIIYQQMuoIIj0AQgghhBAyunh8LOxuHzx+Fh4fC7ePhbf1etttbbef+rWPZcEwDIQ8BnweAwGfAZ/Hg6Dt6/Yt75Tvd3F7h8d3cXvrlmGYSP+oCCGEEEIIIYSEEQVICCGEEEJI2FU0O7D+WD3WHa3HtqImuH1spIfUK/4pARMBj0G0UowErQyJOmlgq5UhQStFok4GrUxIARVCCCGEEEIIGWYoQEIIIYSMYBzHwen1w+L0werywuLywuL0weLywurywedn4ecAP8vCz5625Tj4WA4sG9iKBDzkxqqQFaOEx8fC6mrbj7f1uq/9etvW5vaBASAS8CAW8Fu3gcvpt4m6uK2n+3a1PxGfR5PUIeL1syhptONorRVHayw4VmtFi8MDPo8BjwlkX/CYkwGEtuu8tgwMJnAdAPaVt6CowR7hV9R3fpaDn+Xgaf3a4vJ1+zrkIj4STgmYJGilHb5WS4WDN3BCCCGEEEIIIUGhAAkhhBAyAnh8LL47VI3SRgeqTE5UtThRaXKg1uyC189FeniDhs9joBALoJQIoJIIoZQIoJQIoZIEblPLREjSyZAaJUdalBxauSjSQ444juPQYHXjSK0Vx2otOFpjxdFaKwrrbfD4h36Wx1Bh9/hxrM6KY3XWLr+vlAhas06kHbNQWrcKMR2WE0IIIYQQQshgozMxQgghZIipNbuwrbgRRfV2SIQ8SEUCyER8SIV8pETJMTZOBQGf137/ogYb7v54H/KrLBEc9dDgZzmYnV6YnV4Azl7vr5EJW4MlCszN1OOiSQnhH+QQUNniwJubS3C0JjCh32z39P4gMiBWlw9Haiw4UnPy71Qm4mNcvBoTEjVYkB2NGal68HmUAUUIIYQQQgghg4UCJIQQQkgEsSyHeqsbO0ubsa2oCduLm1DS2HMpIqVYgBlpesxO1wMAnll7DE6vfzCGO+KYHF4UVFvg9bOYkKiO9HCCYnF5cajSjAOVJhytsSJKIUa6QY70aAUyDAro5aJey4x9vqcSb28pHZwBD1PC1gbuQh6vvRm8kN/a2L21fBi/razYKeXF+Aw63Nbh+wwQo5JgQqIGExM1SI9WUECEEEIIIYQQQiKIAiSEEEJICLSVKapocaLB6obb54fHx8LjZ+HxsTA5vGi2e9Bs96DJ7kaTLXC9xeEB28cKWFa3Dz8fqcPPR+rC82JGsESdFNkxKoyJVSLbqESOUYUUvaxDRs6pOI6DxeVDi90Dm9sHu9sHu8cHm9sPmyvw9em3290+eP0sOA7gwIHj2vZ12tft/wBp0XIsyI7GnIwoKCUne1W4vH4crrHgQIUJB1uDIsW99PJQS4VIjw4ETNINCqRHKxCjEkMpEbaXH5ubEQWvn8WeshbsrzDB5R25pbRyY1VYOSUBmQYF/BwHv58LbFv7i7AcB1/rbW39dnz+wN+u28tCLRMiTi1FrEaCOLUUmn40Y2+xe1DUYINYwMe4hOERiCOEEEIIIYSQ0YDhOG5YFya3WCxQq9Uwm81QqVSRHg4hhJBRZP2xery7tRQVzQ5Utjjh9o3cSeahTikRQC8XQa8QQycXtV4XQScXI0ohQoJWiqwYZYfgAxAIQJQ22VFjdqHG5EKt2Ylqswu1ZheqzU7Uml1weAYvO0fAYzA1RYtknRz51WYcq7XCF2QETeO04Kk1L+DxRTehRhWNWEsDHv3ldTy07E6YpB2PkYR8BhqZCKlRciTrZPD4A0E8k9OLWrMTdRZ3OF7eiCAV8tuDJXEaCWJbt3EaKaKVYtSYXSiqt6GowYaiejsKG2wdSphNTNTg+jkpWD42FiJB14G5YLl9frTYTwZfmx0eNNvcaHZ40Wx3w+vjoJIKoJYKoZIKA1vJKddbvycW8Af6YyGEEEIIIYSQISXYuAEFSAghhJB+Mju9uPTVbTha23VTZhIesWoJZqTqMCNNj6nJWsSoJRDxee2ZGQI+A2E3GSGns7q8uOL1HThUZQ7jiAfHy18+ieXHt6JMY8S959yH5757FsmmWvyQNRu3XfRw0PsRC3iQCPmwuLwY3keJQ5OQz+CK6Un43aJM6BXiDt9zef2oNbvQ1BrwaLF70NSaadZka9223t7cmtUUChIhDxqpCPeclYlLpyX2OUOGEEIIIYQQQoYaCpAQQgghIcKyHDx+Fj6Wg0TA61SOqc7iwvbiQP+QrUVNKGtyRGiko5NIwMOUJC1mp+sxO0OP8QmaHgMkHMfB5WXR4ghMOBc32PGHzw4M+wygWEsDPv7oISSbattvK9MYcdnlT6FGFR3BkREAYBjgoonxuGtRJgCgtNGO4kY7ShvtKGm9VJudEQ9K3bYgHX9cmk1BEkIIIUOKn+WobxchhJA+oQAJIYQQ0kdbixrxzNpjKGtywNvaP8TX2qegTYxKjMunJ+GK6UkwqCSd9sFxHDYXNuK/vxRiZ2nzYA5/1ODzGExM1GBWa6P6yclaSIRdlwhqsrlxqMqM/CozDlaaUVBtQYPNDc8wD4Z0Z3LlEaxedX/71yuufAZ7E8ZEcESkTbxGCpGAh4pmR9Bl08JJIRYgK0aBbKMKOcZAT57sGCW0clGkh0YIIWSUMTu9KGqwod4SKHNaa3EHrrde6swueP0cJid
2024-03-17 18:28:15 +01:00
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"loc_geom = [Point(xy) for xy in zip(data['reclong'], data['reclat'])]\n",
"geo_df = gpd.GeoDataFrame(data, crs=loc_crs, geometry=loc_geom)\n",
"\n",
"world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))\n",
"geo_df.plot(ax=world.plot(figsize=(20, 10)), marker='x', color='red', markersize=15)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}