ium_464914/IUM_2.ipynb

2395 lines
655 KiB
Plaintext
Raw Normal View History

2024-03-17 18:28:15 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install --upgrade pip"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"%pip install --user kaggle \n",
"%pip install --user pandas\n",
"%pip install --user scikit-learn\n",
"%pip install --user matplotlib\n",
"%pip install --user geopandas"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt \n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!kaggle datasets download -d nasa/meteorite-landings"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"!unzip -o meteorite-landings.zip -d data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Czyszczenie zbioru</h4>"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>id</th>\n",
" <th>nametype</th>\n",
" <th>recclass</th>\n",
" <th>mass</th>\n",
" <th>fall</th>\n",
" <th>year</th>\n",
" <th>reclat</th>\n",
" <th>reclong</th>\n",
" <th>GeoLocation</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Aachen</td>\n",
" <td>1</td>\n",
" <td>Valid</td>\n",
" <td>L5</td>\n",
" <td>21.0</td>\n",
" <td>Fell</td>\n",
" <td>1880.0</td>\n",
" <td>50.77500</td>\n",
" <td>6.08333</td>\n",
" <td>(50.775000, 6.083330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Aarhus</td>\n",
" <td>2</td>\n",
" <td>Valid</td>\n",
" <td>H6</td>\n",
" <td>720.0</td>\n",
" <td>Fell</td>\n",
" <td>1951.0</td>\n",
" <td>56.18333</td>\n",
" <td>10.23333</td>\n",
" <td>(56.183330, 10.233330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Abee</td>\n",
" <td>6</td>\n",
" <td>Valid</td>\n",
" <td>EH4</td>\n",
" <td>107000.0</td>\n",
" <td>Fell</td>\n",
" <td>1952.0</td>\n",
" <td>54.21667</td>\n",
" <td>-113.00000</td>\n",
" <td>(54.216670, -113.000000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Acapulco</td>\n",
" <td>10</td>\n",
" <td>Valid</td>\n",
" <td>Acapulcoite</td>\n",
" <td>1914.0</td>\n",
" <td>Fell</td>\n",
" <td>1976.0</td>\n",
" <td>16.88333</td>\n",
" <td>-99.90000</td>\n",
" <td>(16.883330, -99.900000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Achiras</td>\n",
" <td>370</td>\n",
" <td>Valid</td>\n",
" <td>L6</td>\n",
" <td>780.0</td>\n",
" <td>Fell</td>\n",
" <td>1902.0</td>\n",
" <td>-33.16667</td>\n",
" <td>-64.95000</td>\n",
" <td>(-33.166670, -64.950000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Adhi Kot</td>\n",
" <td>379</td>\n",
" <td>Valid</td>\n",
" <td>EH4</td>\n",
" <td>4239.0</td>\n",
" <td>Fell</td>\n",
" <td>1919.0</td>\n",
" <td>32.10000</td>\n",
" <td>71.80000</td>\n",
" <td>(32.100000, 71.800000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Adzhi-Bogdo (stone)</td>\n",
" <td>390</td>\n",
" <td>Valid</td>\n",
" <td>LL3-6</td>\n",
" <td>910.0</td>\n",
" <td>Fell</td>\n",
" <td>1949.0</td>\n",
" <td>44.83333</td>\n",
" <td>95.16667</td>\n",
" <td>(44.833330, 95.166670)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Agen</td>\n",
" <td>392</td>\n",
" <td>Valid</td>\n",
" <td>H5</td>\n",
" <td>30000.0</td>\n",
" <td>Fell</td>\n",
" <td>1814.0</td>\n",
" <td>44.21667</td>\n",
" <td>0.61667</td>\n",
" <td>(44.216670, 0.616670)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Aguada</td>\n",
" <td>398</td>\n",
" <td>Valid</td>\n",
" <td>L6</td>\n",
" <td>1620.0</td>\n",
" <td>Fell</td>\n",
" <td>1930.0</td>\n",
" <td>-31.60000</td>\n",
" <td>-65.23333</td>\n",
" <td>(-31.600000, -65.233330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Aguila Blanca</td>\n",
" <td>417</td>\n",
" <td>Valid</td>\n",
" <td>L</td>\n",
" <td>1440.0</td>\n",
" <td>Fell</td>\n",
" <td>1920.0</td>\n",
" <td>-30.86667</td>\n",
" <td>-64.55000</td>\n",
" <td>(-30.866670, -64.550000)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name id nametype recclass mass fall year \\\n",
"0 Aachen 1 Valid L5 21.0 Fell 1880.0 \n",
"1 Aarhus 2 Valid H6 720.0 Fell 1951.0 \n",
"2 Abee 6 Valid EH4 107000.0 Fell 1952.0 \n",
"3 Acapulco 10 Valid Acapulcoite 1914.0 Fell 1976.0 \n",
"4 Achiras 370 Valid L6 780.0 Fell 1902.0 \n",
"5 Adhi Kot 379 Valid EH4 4239.0 Fell 1919.0 \n",
"6 Adzhi-Bogdo (stone) 390 Valid LL3-6 910.0 Fell 1949.0 \n",
"7 Agen 392 Valid H5 30000.0 Fell 1814.0 \n",
"8 Aguada 398 Valid L6 1620.0 Fell 1930.0 \n",
"9 Aguila Blanca 417 Valid L 1440.0 Fell 1920.0 \n",
"\n",
" reclat reclong GeoLocation \n",
"0 50.77500 6.08333 (50.775000, 6.083330) \n",
"1 56.18333 10.23333 (56.183330, 10.233330) \n",
"2 54.21667 -113.00000 (54.216670, -113.000000) \n",
"3 16.88333 -99.90000 (16.883330, -99.900000) \n",
"4 -33.16667 -64.95000 (-33.166670, -64.950000) \n",
"5 32.10000 71.80000 (32.100000, 71.800000) \n",
"6 44.83333 95.16667 (44.833330, 95.166670) \n",
"7 44.21667 0.61667 (44.216670, 0.616670) \n",
"8 -31.60000 -65.23333 (-31.600000, -65.233330) \n",
"9 -30.86667 -64.55000 (-30.866670, -64.550000) "
]
},
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.read_csv(\"data/meteorite-landings.csv\")\n",
"data.head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Podział na podzbiory"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"meteorite_train, meteorite_test = train_test_split(data, test_size=0.2, random_state=1)\n",
"meteorite_train, meteorite_val = train_test_split(meteorite_train, test_size=0.25, random_state=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Statystyki</h4>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Wielkości zbiorów"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"wielkość zbioru: (45716, 10)\n",
"wielkość zbioru treningowego: (27429, 10)\n",
"wielkość zbioru testującego: (9144, 10)\n",
"wielkość zbioru walidacyjnego: (9143, 10)\n"
]
}
],
"source": [
"print(f'wielkość zbioru: {data.shape}')\n",
"print(f'wielkość zbioru treningowego: {meteorite_train.shape}')\n",
"print(f'wielkość zbioru testującego: {meteorite_test.shape}')\n",
"print(f'wielkość zbioru walidacyjnego: {meteorite_val.shape}')"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 45716 entries, 0 to 45715\n",
"Data columns (total 10 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 name 45716 non-null object \n",
" 1 id 45716 non-null int64 \n",
" 2 nametype 45716 non-null object \n",
" 3 recclass 45716 non-null object \n",
" 4 mass 45585 non-null float64\n",
" 5 fall 45716 non-null object \n",
" 6 year 45428 non-null float64\n",
" 7 reclat 38401 non-null float64\n",
" 8 reclong 38401 non-null float64\n",
" 9 GeoLocation 38401 non-null object \n",
"dtypes: float64(4), int64(1), object(5)\n",
"memory usage: 3.5+ MB\n"
]
}
],
"source": [
"data.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Masa meteorytu"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Średnia masa: 13278.078548601516\n",
"Maksymalna masa: 60000000.0\n",
"Minimalna masa: 0.0\n"
]
}
],
"source": [
"print(f'Średnia masa: {data[\"mass\"].mean()}')\n",
"print(f'Maksymalna masa: {data[\"mass\"].max()}')\n",
"print(f'Minimalna masa: {data[\"mass\"].min()}')"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1EAAAIjCAYAAADiGJHUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAABDt0lEQVR4nO3de1RU9eL//9cAAl4QLyiIAt5NUjEVzdTStGNollbeykQz+3QaUiM7X+188lKmXY5mdSbtcpSyY5ql1ifzkmhZHTuihl1Q0yLxCqIlgooK+/eHP2c1Asoe0D0wz8das5b7Mnu/9sygvNx7v8dmGIYhAAAAAECp+FgdAAAAAAAqEkoUAAAAAJhAiQIAAAAAEyhRAAAAAGACJQoAAAAATKBEAQAAAIAJlCgAAAAAMIESBQAAAAAmUKIAAAAAwARKFACUwrRp02Sz2a7Jvnr27KmePXs6p7/44gvZbDZ9+OGH12T/o0aNUuPGja/JvtyVm5urhx56SGFhYbLZbJowYYLVkSx3LT+jAODtKFEAvE5SUpJsNpvzERgYqPDwcPXt21evvvqqTp48WS77OXTokKZNm6bU1NRy2V558uRspTFz5kwlJSXpr3/9qxYtWqQHHnigxHUbN24sm82mPn36FLv8rbfecn4Wtm7dajpLWlqapk2bpt9++830c6+2mTNnauXKleW+3av9mgKAp6NEAfBazzzzjBYtWqR58+bpsccekyRNmDBBbdu21ffff++y7v/+7//q9OnTprZ/6NAhTZ8+3XRRWbdundatW2fqOWZdLttbb72l3bt3X9X9l9WGDRt04403aurUqRoxYoQ6dux42fUDAwO1ceNGHTlypMiyf//73woMDHQ7S1pamqZPn255iSruM3q1SpR0dV9TAPB0lCgAXisuLk4jRozQ6NGjNXnyZK1du1br169XVlaW7rzzTpdfSP38/K76L4WnTp2SJPn7+8vf3/+q7utyqlSpooCAAMv2XxpZWVmqVatWqdfv1q2batSooaVLl7rMP3DggL766iv179+/nBNeO3l5eZKuzWf0zyrzawoAV0KJAoA/ufXWW/X0009r3759eu+995zzi7vf5PPPP1f37t1Vq1Yt1ahRQ61atdJTTz0l6cJ9TLGxsZKk0aNHOy9tSkpKknThvqc2bdpo27Ztuvnmm1WtWjXncy+9J+qigoICPfXUUwoLC1P16tV15513av/+/S7rNG7cWKNGjSry3D9v80rZirsnKi8vT0888YQiIiIUEBCgVq1a6R//+IcMw3BZz2azKSEhQStXrlSbNm0UEBCg66+/XmvWrCn+Bb9EVlaWxowZo9DQUAUGBiomJkbvvPOOc/nF+8PS09O1atUqZ/YrnQUKDAzU3XffrcWLF7vMf//991W7dm317du32Oft2rVL9957r+rUqaPAwEB16tRJn3zyiXN5UlKSBg8eLEnq1auXM88XX3zhXGf16tXq0aOHqlevrqCgIPXv318//fRTkX1t2LDBuV6tWrV01113aefOnS7rXPwcpqWl6b777lPt2rXVvXt3l2UX2Ww25eXl6Z133nHm+vNn4+DBg3rwwQcVGhrqfJ8WLFhw2dfxz9x5Tb///nuNGjVKTZs2VWBgoMLCwvTggw/q2LFjLuudPHlSEyZMUOPGjRUQEKD69evrtttu0/bt253r7NmzR/fcc4/CwsIUGBioRo0aadiwYTpx4oTLtt577z117NhRVatWVZ06dTRs2LAiPzel3RYAXORndQAA8DQPPPCAnnrqKa1bt05jx44tdp2ffvpJd9xxh9q1a6dnnnlGAQEB2rt3r7755htJUuvWrfXMM89oypQpevjhh9WjRw9J0k033eTcxrFjxxQXF6dhw4ZpxIgRCg0NvWyu5557TjabTf/v//0/ZWVlae7cuerTp49SU1NVtWrVUh9fabL9mWEYuvPOO7Vx40aNGTNG7du319q1a/Xkk0/q4MGDevnll13W//rrr7V8+XI9+uijCgoK0quvvqp77rlHGRkZqlu3bom5Tp8+rZ49e2rv3r1KSEhQkyZNtGzZMo0aNUp//PGHxo8fr9atW2vRokV6/PHH1ahRIz3xxBOSpHr16l3xuO+77z795S9/0S+//KJmzZpJkhYvXqx7771XVapUKbL+Tz/9pG7duqlhw4aaNGmSqlevrg8++EADBw7URx99pEGDBunmm2/WuHHj9Oqrr+qpp55S69atna+xJC1atEjx8fHq27evXnjhBZ06dUrz5s1T9+7d9d133znL6vr16xUXF6emTZtq2rRpOn36tF577TV169ZN27dvL1JqBw8erBYtWmjmzJlFiuxFixYt0kMPPaTOnTvr4YcfliTncWdmZurGG290lt569epp9erVGjNmjHJycko9UIfZ1/Tzzz/Xr7/+qtGjRyssLEw//fST3nzzTf3000/69ttvnSXwkUce0YcffqiEhARFR0fr2LFj+vrrr7Vz50516NBBZ8+eVd++fZWfn6/HHntMYWFhOnjwoD799FP98ccfCg4OlnThZ+bpp5/WkCFD9NBDD+no0aN67bXXdPPNN+u7775TrVq1Sr0tAHBhAICXWbhwoSHJSElJKXGd4OBg44YbbnBOT5061fjzX5kvv/yyIck4evRoidtISUkxJBkLFy4ssuyWW24xJBnz588vdtktt9zinN64caMhyWjYsKGRk5PjnP/BBx8YkoxXXnnFOS8qKsqIj4+/4jYvly0+Pt6IiopyTq9cudKQZMyYMcNlvXvvvdew2WzG3r17nfMkGf7+/i7zduzYYUgyXnvttSL7+rO5c+cakoz33nvPOe/s2bNG165djRo1argce1RUlNG/f//Lbu/Sdc+fP2+EhYUZzz77rGEYhpGWlmZIMr788stiPxO9e/c22rZta5w5c8Y5r7Cw0LjpppuMFi1aOOctW7bMkGRs3LjRZb8nT540atWqZYwdO9Zl/pEjR4zg4GCX+e3btzfq169vHDt2zDlvx44dho+PjzFy5EjnvIufw+HDhxc5zks/o4ZhGNWrVy/28zBmzBijQYMGRnZ2tsv8YcOGGcHBwcapU6eKPOfP3H1Ni9vu+++/b0gyNm3a5JwXHBxs2O32Evf/3XffGZKMZcuWlbjOb7/9Zvj6+hrPPfecy/wffvjB8PPzc84vzbYA4FJczgcAxahRo8ZlR+m7eD/Oxx9/rMLCQrf2ERAQoNGjR5d6/ZEjRyooKMg5fe+996pBgwb67LPP3Np/aX322Wfy9fXVuHHjXOY/8cQTMgxDq1evdpnfp08f51kJSWrXrp1q1qypX3/99Yr7CQsL0/Dhw53zqlSponHjxik3N1dffvllmY7D19dXQ4YM0fvvvy/pwuAHERERzjNxf3b8+HFt2LBBQ4YM0cmTJ5Wdna3s7GwdO3ZMffv21Z49e3Tw4MHL7u/zzz/XH3/8oeHDhzufn52dLV9fX3Xp0kUbN26UJB0+fFipqakaNWqU6tSp43x+u3btdNtttxX7/j7yyCNuvw6GYeijjz7SgAEDZBiGS7a+ffvqxIkTLpfNXY6Z11SSyxnTM2fOKDs7WzfeeKMkueyzVq1a+u9//6tDhw4Vu52LZ4fWrl3rvJfwUsuXL1dhYaGGDBnicoxhYWFq0aKF8/UvzbYA4FKUKAAoRm5urkthudTQoUPVrVs3PfTQQwoNDdWwYcP0wQcfmCpUDRs2NDWARIsWLVymbTabmjdvftVHhdu3b5/Cw8OLvB4XL1nbt2+fy/zIyMgi26hdu7Z+//33K+6nRYsW8vFx/aeppP2447777lNaWpp27NihxYsXa9iwYcV+t9LevXtlGIaefvpp1atXz+UxdepUSRfu37qcPXv2SLpwn92l21i3bp3z+RePq1WrVkW20bp1a2VnZzsHj7ioSZMm5g/+/3f06FH98ccfevPNN4vkuljqr3Rsf1ba11S6UE7Hjx+v0NBQVa1aVfXq1XMey5/vP3rxxRf1448/KiIiQp07d9a0adNcSniTJk2UmJiot99+WyEhIerbt68cDofLNvbs2SPDMNSiRYsix7lz507nMZZ
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(10, 6))\n",
"plt.hist(data[\"mass\"], color= \"tan\", log= True, edgecolor=\"black\")\n",
"plt.title('Distribution of Meteorite Masses')\n",
"plt.xlabel('Mass')\n",
"plt.ylabel('Frequency')\n",
"plt.legend\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>id</th>\n",
" <th>nametype</th>\n",
" <th>recclass</th>\n",
" <th>mass</th>\n",
" <th>fall</th>\n",
" <th>year</th>\n",
" <th>reclat</th>\n",
" <th>reclong</th>\n",
" <th>GeoLocation</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>12627</th>\n",
" <td>Gove</td>\n",
" <td>52859</td>\n",
" <td>Relict</td>\n",
" <td>Relict iron</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>1979.0</td>\n",
" <td>-12.26333</td>\n",
" <td>136.83833</td>\n",
" <td>(-12.263330, 136.838330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25551</th>\n",
" <td>Miller Range 090478</td>\n",
" <td>55953</td>\n",
" <td>Valid</td>\n",
" <td>CO3</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2009.0</td>\n",
" <td>0.00000</td>\n",
" <td>0.00000</td>\n",
" <td>(0.000000, 0.000000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31060</th>\n",
" <td>Österplana 048</td>\n",
" <td>56147</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2004.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31061</th>\n",
" <td>Österplana 049</td>\n",
" <td>56148</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2012.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31062</th>\n",
" <td>Österplana 050</td>\n",
" <td>56149</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2003.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31063</th>\n",
" <td>Österplana 051</td>\n",
" <td>56150</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2006.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31064</th>\n",
" <td>Österplana 052</td>\n",
" <td>56151</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2006.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31065</th>\n",
" <td>Österplana 053</td>\n",
" <td>56152</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2002.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31066</th>\n",
" <td>Österplana 054</td>\n",
" <td>56153</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2005.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31067</th>\n",
" <td>Österplana 055</td>\n",
" <td>56154</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2008.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31068</th>\n",
" <td>Österplana 056</td>\n",
" <td>56155</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2008.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31069</th>\n",
" <td>Österplana 057</td>\n",
" <td>56156</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2009.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31070</th>\n",
" <td>Österplana 058</td>\n",
" <td>56157</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2009.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31071</th>\n",
" <td>Österplana 059</td>\n",
" <td>56158</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2009.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31072</th>\n",
" <td>Österplana 060</td>\n",
" <td>56159</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2009.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31073</th>\n",
" <td>Österplana 061</td>\n",
" <td>56160</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2009.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31074</th>\n",
" <td>Österplana 062</td>\n",
" <td>56161</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2010.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31075</th>\n",
" <td>Österplana 063</td>\n",
" <td>56162</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2010.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31076</th>\n",
" <td>Österplana 064</td>\n",
" <td>56163</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>0.0</td>\n",
" <td>Found</td>\n",
" <td>2011.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name id nametype recclass mass fall year \\\n",
"12627 Gove 52859 Relict Relict iron 0.0 Found 1979.0 \n",
"25551 Miller Range 090478 55953 Valid CO3 0.0 Found 2009.0 \n",
"31060 Österplana 048 56147 Relict Relict OC 0.0 Found 2004.0 \n",
"31061 Österplana 049 56148 Relict Relict OC 0.0 Found 2012.0 \n",
"31062 Österplana 050 56149 Relict Relict OC 0.0 Found 2003.0 \n",
"31063 Österplana 051 56150 Relict Relict OC 0.0 Found 2006.0 \n",
"31064 Österplana 052 56151 Relict Relict OC 0.0 Found 2006.0 \n",
"31065 Österplana 053 56152 Relict Relict OC 0.0 Found 2002.0 \n",
"31066 Österplana 054 56153 Relict Relict OC 0.0 Found 2005.0 \n",
"31067 Österplana 055 56154 Relict Relict OC 0.0 Found 2008.0 \n",
"31068 Österplana 056 56155 Relict Relict OC 0.0 Found 2008.0 \n",
"31069 Österplana 057 56156 Relict Relict OC 0.0 Found 2009.0 \n",
"31070 Österplana 058 56157 Relict Relict OC 0.0 Found 2009.0 \n",
"31071 Österplana 059 56158 Relict Relict OC 0.0 Found 2009.0 \n",
"31072 Österplana 060 56159 Relict Relict OC 0.0 Found 2009.0 \n",
"31073 Österplana 061 56160 Relict Relict OC 0.0 Found 2009.0 \n",
"31074 Österplana 062 56161 Relict Relict OC 0.0 Found 2010.0 \n",
"31075 Österplana 063 56162 Relict Relict OC 0.0 Found 2010.0 \n",
"31076 Österplana 064 56163 Relict Relict OC 0.0 Found 2011.0 \n",
"\n",
" reclat reclong GeoLocation \n",
"12627 -12.26333 136.83833 (-12.263330, 136.838330) \n",
"25551 0.00000 0.00000 (0.000000, 0.000000) \n",
"31060 58.58333 13.43333 (58.583330, 13.433330) \n",
"31061 58.58333 13.43333 (58.583330, 13.433330) \n",
"31062 58.58333 13.43333 (58.583330, 13.433330) \n",
"31063 58.58333 13.43333 (58.583330, 13.433330) \n",
"31064 58.58333 13.43333 (58.583330, 13.433330) \n",
"31065 58.58333 13.43333 (58.583330, 13.433330) \n",
"31066 58.58333 13.43333 (58.583330, 13.433330) \n",
"31067 58.58333 13.43333 (58.583330, 13.433330) \n",
"31068 58.58333 13.43333 (58.583330, 13.433330) \n",
"31069 58.58333 13.43333 (58.583330, 13.433330) \n",
"31070 58.58333 13.43333 (58.583330, 13.433330) \n",
"31071 58.58333 13.43333 (58.583330, 13.433330) \n",
"31072 58.58333 13.43333 (58.583330, 13.433330) \n",
"31073 58.58333 13.43333 (58.583330, 13.433330) \n",
"31074 58.58333 13.43333 (58.583330, 13.433330) \n",
"31075 58.58333 13.43333 (58.583330, 13.433330) \n",
"31076 58.58333 13.43333 (58.583330, 13.433330) "
]
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[data['mass'] == 0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Wygląda na to, że odnaleziono dużo meteorytów z masą równą 0 w tym samym miejscu. <br>\n",
"Po researchu, okazało się, że to nie są niepoprawne wartości. W Szwecji, znaleziono skamieniałe meteoryty, które są bardzo stare (setki miliony lat), przez co nie ma możliwości obliczenia ich masy. \n",
"Źródła:\n",
"- https://en.wikipedia.org/wiki/Österplana_065"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Fall</h4>"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"fall\n",
"Found 44609\n",
"Fell 1107\n",
"Name: count, dtype: int64"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[\"fall\"].value_counts() "
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArUAAAH5CAYAAACf0sbLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAjkUlEQVR4nO3df5BV9X3/8dcC7vJDdw0qSygQMSYqUWFAhW3atBrqqpipFaeaWkVFM1p01G1VaB1Qk8ZUx58DSloTsUkcf8RRIyjq4IgZRdGlNEiESb8xAynZBarsClFWYb9/ZLjjRkxEfqwfeTxm7kz2nPc953PuTK5P79x7rOrs7OwMAAAUrEd3LwAAAHaUqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4vXq7gV0py1btmT16tXZZ599UlVV1d3LAQDg93R2duatt97KoEGD0qPHh38eu0dH7erVqzNkyJDuXgYAAH/EqlWrMnjw4A/dv0dH7T777JPkdy9SbW1tN68GAIDf197eniFDhlS67cPs0VG79SsHtbW1ohYA4BPsj31V1A/FAAAonqgFAKB4ohYAgOKJWgAAiidqAQAonqgFAKB4ohYAgOKJWgAAiidqAQAonqgFAKB4ohYAgOKJWgAAiidqAQAonqgFAKB4ohYAgOKJWgAAiidqAQAonqgFAKB4vbp7AXualSvbs27d2929DGAX2n//Phk6tLa7lwGwRxG1u9HKle057LDv57e/fa+7lwLsQn379sprr50nbAF2I1G7G61b93Z++9v3cumNf5rBB9V193KAXeDXv2zLbVe8kHXr3ha1ALuRqO0Ggw+qy0Ff6t/dywAA+NTwQzEAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeKIWAIDiiVoAAIonagEAKJ6oBQCgeDsUtd/5zndSVVWVyy67rLLtnXfeyeTJk7Pffvtl7733zoQJE9La2trleStXrsz48ePTt2/fDBgwIFdccUXee++9LjPPPvtsRo0alZqamhx88MGZPXv2B84/c+bMHHjggendu3fGjBmTRYsW7cjlAABQqI8dtS+//HK++93v5sgjj+yy/fLLL89jjz2WBx98MAsWLMjq1atz6qmnVvZv3rw548ePT0dHR1544YXcc889mT17dqZNm1aZef311zN+/Pgce+yxWbJkSS677LKcf/75efLJJysz999/f5qamjJ9+vQsXrw4I0aMSGNjY9asWfNxLwkAgEJ9rKjdsGFDzjzzzPzHf/xHPvOZz1S2t7W15Xvf+15uvvnmHHfccRk9enTuvvvuvPDCC3nxxReTJE899VR+/vOf54c//GFGjhyZE088Md/85jczc+bMdHR0JElmzZqVYcOG5aabbsphhx2Wiy++OKeddlpuueWWyrluvvnmXHDBBTn33HMzfPjwzJo1K3379s33v//9HXk9AAAo0MeK2smTJ2f8+PEZN25cl+3Nzc159913u2w/9NBDM3To0CxcuDBJsnDhwhxxxBGpr6+vzDQ2Nqa9vT3Lli2rzPz+sRsbGyvH6OjoSHNzc5eZHj16ZNy4cZWZbdm0aVPa29u7PAAAKF+v7X3Cfffdl8WLF+fll1/+wL6WlpZUV1dn33337bK9vr4+LS0tlZn3B+3W/Vv3/aGZ9vb2vP3223nzzTezefPmbc4sX778Q9d+/fXX59prr/1oFwoAQDG265PaVatW5dJLL82PfvSj9O7de1etaZeZOnVq2traKo9Vq1Z195IAANgJtitqm5ubs2bNmowaNSq9evVKr169smDBgtx+++3p1atX6uvr09HRkfXr13d5XmtrawYOHJgkGThw4AfuhrD17z82U1tbmz59+mT//fdPz549tzmz9RjbUlNTk9ra2i4PAADKt11R+9WvfjVLly7NkiVLKo+jjjoqZ555ZuV/77XXXpk/f37lOStWrMjKlSvT0NCQJGloaMjSpUu73KXg6aefTm1tbYYPH16Zef8xts5sPUZ1dXVGjx7dZWbLli2ZP39+ZQYAgD3Hdn2ndp999snhhx/eZVu/fv2y3377VbZPmjQpTU1N6d+/f2pra3PJJZekoaEhY8eOTZIcf/zxGT58eM4666zccMMNaWlpydVXX53JkyenpqYmSXLhhRdmxowZufLKK3PeeeflmWeeyQMPPJC5c+dWztvU1JSJEyfmqKOOyjHHHJNbb701GzduzLnnnrtDLwgAAOXZ7h+K/TG33HJLevTokQkTJmTTpk1pbGzMHXfcUdnfs2fPzJkzJxdddFEaGhrSr1+/TJw4Mdddd11lZtiwYZk7d24uv/zy3HbbbRk8eHDuuuuuNDY2VmZOP/30rF27NtOmTUtLS0tGjhyZefPmfeDHYwAAfPpVdXZ2dnb3IrpLe3t76urq0tbWtlu+X7t4cWtGj/5BbnzoxBz0pf67/HzA7vfLZW/kiglPpLn5rIwa5V+yAXbUR+21HfrP5AIAwCeBqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKJ2oBACieqAUAoHiiFgCA4olaAACKt11Re+edd+bII49MbW1tamtr09DQkCeeeKKy/5133snkyZOz3377Ze+9986ECRPS2tra5RgrV67M+PHj07dv3wwYMCBXXHFF3nvvvS4zzz77bEaNGpWampocfPDBmT179gfWMnPmzBx44IHp3bt3xowZk0WLFm3PpQAA8CmyXVE7ePDgfOc730lzc3NeeeWVHHfccfnrv/7rLFu2LEly+eWX57HHHsuDDz6YBQsWZPXq1Tn11FMrz9+8eXPGjx+fjo6OvPDCC7nnnnsye/bsTJs2rTLz+uuvZ/z48Tn22GOzZMmSXHbZZTn//PPz5JNPVmbuv//+NDU1Zfr06Vm8eHFGjBiRxsbGrFmzZkdfDwAAClTV2dnZuSMH6N+/f2688cacdtppOeCAA3LvvffmtNNOS5IsX748hx12WBYuXJixY8fmiSeeyMknn5zVq1envr4+STJr1qxcddVVWbt2baqrq3PVVVdl7ty5efXVVyvnOOOMM7J+/frMmzcvSTJmzJgcffTRmTFjRpJky5YtGTJkSC655JJMmTLlQ9e6adOmbNq0qfJ3e3t7hgwZkra2ttTW1u7Iy/CRLF7cmtGjf5AbHzoxB32p/y4/H7D7/XLZG7liwhNpbj4ro0bVd/dyAIrX3t6eurq6P9prH/s7tZs3b859992XjRs3pqGhIc3NzXn33Xczbty4ysyhhx6aoUOHZuHChUmShQsX5ogjjqgEbZI0Njamvb298mnvwoULuxxj68zWY3R0dKS5ubnLTI8
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(8, 6))\n",
"plt.bar([\"Fell\",\"Found\"], data[\"fall\"].value_counts(), color=[\"lightblue\", \"lightgreen\"], edgecolor= [\"darkblue\", \"darkgreen\"])\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Klasa meteorytu</h4>"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Liczba klas meteorytow: 466\n",
"10 najpopularniejszych klas:\n"
]
},
{
"data": {
"text/plain": [
"recclass\n",
"L6 8285\n",
"H5 7142\n",
"L5 4796\n",
"H6 4528\n",
"H4 4211\n",
"LL5 2766\n",
"LL6 2043\n",
"L4 1253\n",
"H4/5 428\n",
"CM2 416\n",
"Name: count, dtype: int64"
]
},
"execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"class_count = data['recclass'].nunique()\n",
"print(f'Liczba klas meteorytow: {class_count}')\n",
"top_10 = data['recclass'].value_counts().head(10)\n",
"print(\"10 najpopularniejszych klas:\")\n",
"top_10"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Lokalizacja</h4>"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\Genos\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\pyproj\\crs\\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n",
" in_crs_string = _prepare_from_proj_string(in_crs_string)\n",
"C:\\Users\\Genos\\AppData\\Local\\Temp\\ipykernel_21240\\101546836.py:8: FutureWarning: The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.\n",
" world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))\n"
]
},
{
"data": {
"text/plain": [
"<Axes: >"
]
},
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABkgAAAIkCAYAAABV8acMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUZfbA8e/0kslk0ntC70gRUQERrFgRXF172V2764JYFntZdW0r68/eexfsoiuCKCDSew+k9zZJps/c3x+TDAkphJBKzud58gy5c+fOe2fClPe85xyVoigKQgghhBBCCCGEEEIIIYQQvYi6qwcghBBCCCGEEEIIIYQQQgjR2SRAIoQQQgghhBBCCCGEEEKIXkcCJEIIIYQQQgghhBBCCCGE6HUkQCKEEEIIIYQQQgghhBBCiF5HAiRCCCGEEEIIIYQQQgghhOh1JEAihBBCCCGEEEIIIYQQQoheRwIkQgghhBBCCCGEEEIIIYTodSRAIoQQQgghhBBCCCGEEEKIXkfb1QM4XIFAgLy8PMLDw1GpVF09HCGEEEIIIYQQQgghhBBCdCFFUaiqqiIpKQm1uvk8kR4fIMnLyyM1NbWrhyGEEEIIIYQQQgghhBBCiG4kOzublJSUZq/v8QGS8PBwIHiiVqu1i0cjhBBCCCGEEEIIIYQQQoiuZLfbSU1NDcUPmtPjAyR1ZbWsVqsESIQQQgghhBBCCCGEEEIIAXDQthzSpF0IIYQQQgghhBBCCCGEEL2OBEiEEEIIIYQQQgghhBBCCNHrSIBECCGEEEIIIYQQQgghhBC9jgRIhBBCCCGEEEIIIYQQQgjR60iARAghhBBCCCGEEEIIIYQQvY4ESIQQQgghhBBCCCGEEEII0etIgEQIIYQQQgghhBBCCCGEEL2OBEiEEEIIIYQQQgghhBBCCNHrSIBECCGEEEIIIYQQQgghhBC9jgRIhBBCCCGEEEIIIYQQQgjR60iARAghhBBCCCGEEEIIIYQQvY4ESIQQQgghhBBCCCGEEEII0etIgEQIIYQQQgghhBBCCCGEEL2OBEiEEEIIIYQQQgghhBBCCNHrSIBECCGEEEIIIYQQQgghhBC9jgRIhBBCCCGEEEIIIYQQQgjR60iARAghhBBCCCGEEEIIIYQQvY4ESIQQQgghhBBCCCGEEEII0etIgEQIIYQQQgghhBBCCCGEEL2OtqsHIIQQQoi28fkDePwBPL7gjy+goFGr0KhVaEOX6tDvarWqq4cshBBCCCGEEEII0W1IgEQIIUSb+QMKABqZeD8kihJ83FSqlh+3rFIHi3cUsXhHEbsKq3H7Anh8/lBQpPbhbzWVilDgRKdWo9E0HUgJBVk0KoxaDUk2EymRJlKjzMHLSDNJNhN6rSSiCiGEEEIIIYQQoueSAIkQQhyhXF4/OeUOcsqd5FY4Ka5yY3f6sLu8VLm82J0+PP5g1kEgoNS7DE68+wMKqVHByfAqt48ql48ql7fBpcPjB4IBEoNWjV6rRq9RY9DVXmo16LVqDFo1Bp0mdJ2h/j612406NeFGHVaTlnCDDqtJR7hRS2KEEZtZ38WPZtsEAgo55U62FdjZnl/FjsLg5d7SGhQl+LhpVCrUatCq1ahVhIITigKlNZ52HY+igNev4PUruAi0/oaZ5Y02qVSQYDWGAiYpkSZS6gVQEiOMaDUSQBFCCCGEEEIIIUT3JQESIYToYVxeP2syy9mQU4FWrcKk12IxaDgqxUa/mDBUKhU/by/k9k83HvYEe1aZAyg96H7+gILD4w8FTNpbpFlH35gw+sSEcVy/aC4cl9oh99MeVu0rY/7aXLYX2NlRUNXiY+IPKPhRwA8cSsCiG1AUyK90kV/pYtW+hgEUjVrFiCQrp49I4KyRiaRHh3XRKIUQQgghhBBCCCGaJwESIYToJgIBBY8/gNsXwO31U+bwUFrtobTGQ1m1m6IqN6szy1mfVYHH3/RkerzVwMC4cH7bXdLJo+84ahVEWwwk2kz0iwmjb0zzk+2BgILD66fG7aPa7QtdenwBFIDaklQKCooSnOQHSIgwMizR2qBHh9vnZ3t+FRtzKtiQU8nGnAo8vgBHmXzc+OGT7Jr7MEkjBzLAXU7E3DvglVcgOpofNhfw+docPL7uF/BQqWB8nyiiLXq0ajXa2hJbWo0arVqFP6AQUBT8tb1M4sKNGHTqYDmu2swWdW3Wi0YNalXtNtX+7QkRBoYlRmDSa7r6dIUQQgghhBBCCCFaJAESIYToIr9nlPLxqmyyyxxklzsotLsP+5iFdne7HKezqFQQazEQFaYnpvYy2qInOkxPQoSJIQnhDIizYNRpqHb72JBdwb6SGn7fU0pepYuCSif5lS7KHR6qXT4cXn8o6HGoYiwGThwUi1mvYWNOBdvyq5oMRN254FGG7FyO6fJNzD5rDs98+zQRFQUs2VHMY3/9F+FGLVMGxeLyBah0eKhweimr8VDl8h3mo3X4FAXWZVWQEmXC7a0Nxvn8tb1NGp+rWgXxViNJNhOJEcHLpAgjcTYT4UYtmaUOthdUsae4mj3F1VTUeJkxNpnoMAN9Wghk1fEHFCocHspq6v04PJRVe3B4/YQbtUSYdESYdFiNtZe1v0eYdNL7RgghhBBCCCGEEIdFAiRCCNFFhiZYiQs3kFFcTXFVzwlqtAe9Vs2YVBvj+kRiMegw6zUc0yeKIQnhDbI46jNq1by5bB8/bSvskDGVVLv5fG3OQfd76ORrGFaUQXpFAfPfvx2ATFsCcyf/hfzCqg4ZW3vy+ANkFNe0at9AvTJarVHXb8Zq0lFS7WZfSQ0ZJTVklTooqXZTVuOh3BHMiiqvCQaP2hrQGptm47lLxpJkM7XtAEIIIYQQQgghhOj1JEAihBAdZMWeUl7/bS8urx+PP4DXH8DnV0iIMHLpsWlMHhjL3DOHAmB3eflmQz4vLNlNTrmzi0feCZRg9ohBq+GYPpEclWJDrw029HZ5/ewoqGJjbiWbcirILnNS7vBQ4fBS7mjfpuVtkW+NZfZZc0LBEYDZZ80h3xrbhaPqeioVjE6xsXJvGR+tWtzuGTNatYr+sRYGJYQzJCGcwfHhaCWDRAjRAewuL1vz7BTaXRTaXRRUuimsclFsd9MnxsyE/jFM6B9NnNXY1UMVQgghhBBCHCaVorR17Wb3YLfbiYiIoLKyEqvV2tXDEUL0YjVuH5+szuaDlVnklDtxeltuWN4n2syFx6QyNMFKapSJZJsZrUbFb7tK2JRbyebcSrbk2cmtOHjARK0Kttfojq/o4QYtI1MiGBQfnNgelBDOsEQrRp0Gjy/AjoIqNuVWsim3go05lewoqMIX6IYnUivRXsxHH84lvaIgtC3TlsBFFz/W64Mk7SXZFiyvNrjeT78YSyiIJjqW0+PHqFOjUqkabd9TXE20JVgST6c5cp+PGrePAruLwtoMqqIqNxVOD4oS7HUUUOCUoXHEWQ18uT6PrXl2dhVVE2bQ0jfGTL8YCxeOSyUt2tzVpyJaweX1szarnGW7S1i2u5SNORW05m1oQJyFif2jOXd0MmPTbI3+zwghhBBCCCG6TmvjBhIgEUKIdvC/rYXc+8VmCuytK0XUnBiLgeRIE8k2I4kRJpJsJoYlhpNsM+PxByitdmPUaTDqNBi06tClTqvGrNPgrG1QvqOwiiU7ilmyo4g9rSyn1N5iww1cc0JfLjk2HYthf8KioiisyCjllaUZLN9d2mzD+e7qxQWPcsbO5WTaEkI9SNIrCvh+0ARumHFXVw+v2zPpNFiMWsINWixGLRbD/p+ECCPnH51C/1hLVw/ziOby+vl6Qx7ZZQ6Kqz2UVLv3/1R5cHr9hOk1DIizMCAunLQoMxtyKli2uwR3ba8alQqiw/TEhhuJtxqICw/2nRmaYGVIYjgJVuMRM1msKAo55U7u+GwjKzJKQ9v1GjVp0WamDo4l2mIgOkxPnDX4eCRYjdjM+i4ctWiN3AonH67M4tM12Yfdv2tEspUrju/DuaOSMOo07TRCIYQQQgghRFtJgEQIITqJP6Dw1I87eOmXPR2SwaHTqHh0xkjOHZ3EJa+uZE1meZP
"text/plain": [
"<Figure size 2000x1000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import geopandas as gpd\n",
"from shapely.geometry import Point\n",
"\n",
"loc_crs = {'init': 'epsg:4326'}\n",
"loc_geom = [Point(xy) for xy in zip(data['reclong'], data['reclat'])]\n",
"geo_df = gpd.GeoDataFrame(data, crs=loc_crs, geometry=loc_geom)\n",
"\n",
"world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))\n",
"geo_df.plot(ax=world.plot(figsize=(20, 10)), marker='x', color='red', markersize=15)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Normalizacja danych</h4>"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 -0.023056\n",
"1 -0.021841\n",
"2 0.163000\n",
"3 -0.019764\n",
"4 -0.021736\n",
" ... \n",
"45711 -0.022794\n",
"45712 -0.023013\n",
"45713 -0.023087\n",
"45714 -0.019324\n",
"45715 -0.022745\n",
"Name: mass, Length: 45716, dtype: float64"
]
},
"execution_count": 129,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"scaler = StandardScaler()\n",
"\n",
"meteorite_train['mass'] = scaler.fit_transform(meteorite_train[['mass']])\n",
"meteorite_test['mass'] = scaler.fit_transform(meteorite_test[['mass']])\n",
"meteorite_val['mass'] = scaler.fit_transform(meteorite_val[['mass']])\n",
"\n",
"data['mass'] = scaler.fit_transform(data[['mass']])\n",
"\n",
"data['mass']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h4>Czyszczenie zbioru</h4>"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"name 45716\n",
"id 45716\n",
"nametype 2\n",
"recclass 466\n",
"mass 12576\n",
"fall 2\n",
"year 268\n",
"reclat 12738\n",
"reclong 14640\n",
"GeoLocation 17100\n",
"dtype: int64"
]
},
"execution_count": 130,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.nunique()"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"name 0\n",
"id 0\n",
"nametype 0\n",
"recclass 0\n",
"mass 131\n",
"fall 0\n",
"year 288\n",
"reclat 7315\n",
"reclong 7315\n",
"GeoLocation 7315\n",
"dtype: int64"
]
},
"execution_count": 131,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.isna().sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Według dokumentacji: \n",
"<br>\n",
"reclant - szerokość geograficzna\n",
"<br>\n",
"reclong - długość geograficzna"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(7315, 10)"
]
},
"execution_count": 132,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"filtered_data = data.loc[data['reclat'].isnull() & data['reclong'].isnull() & data['GeoLocation'].isnull()]\n",
"filtered_data.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Wnioski:\n",
"Miejsca, w których brakuje zarówno szerokości geograficznej, jak i długości geograficznej, zazwyczaj nie posiadają również informacji o całej geolokacji. Z uwagi na powiązanie tych trzech parametrów, zamiast próbować uzupełniać brakujące dane, wiersze zawierające braki w tych trzech obszarach zostaną usunięte."
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"name 0\n",
"id 0\n",
"nametype 0\n",
"recclass 0\n",
"mass 119\n",
"fall 0\n",
"year 175\n",
"reclat 0\n",
"reclong 0\n",
"GeoLocation 0\n",
"dtype: int64"
]
},
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = data.dropna(subset=['reclat'])\n",
"meteorite_train = meteorite_train.dropna(subset=['reclat'])\n",
"meteorite_test = meteorite_test.dropna(subset=['reclat'])\n",
"meteorite_val = meteorite_val.dropna(subset=['reclat'])\n",
"\n",
"data.isna().sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Według dokumentacji:\n",
"- a few entries here contain date information that was incorrectly parsed into the NASA database. As a spot check: any date that is before 860 CE or after 2016 are incorrect; these should actually be BCE years. There may be other errors and we are looking for a way to identify them.\n",
"- a few entries have latitude and longitude of 0N/0E (off the western coast of Africa, where it would be quite difficult to recover meteorites). Many of these were actually discovered in Antarctica, but exact coordinates were not given. 0N/0E locations should probably be treated as NA."
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>id</th>\n",
" <th>nametype</th>\n",
" <th>recclass</th>\n",
" <th>mass</th>\n",
" <th>fall</th>\n",
" <th>year</th>\n",
" <th>reclat</th>\n",
" <th>reclong</th>\n",
" <th>GeoLocation</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>16356</th>\n",
" <td>Havana</td>\n",
" <td>11857</td>\n",
" <td>Valid</td>\n",
" <td>Iron, IAB complex</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>301.0</td>\n",
" <td>40.33333</td>\n",
" <td>-90.05000</td>\n",
" <td>(40.333330, -90.050000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30679</th>\n",
" <td>Northwest Africa 7701</td>\n",
" <td>57150</td>\n",
" <td>Valid</td>\n",
" <td>CK6</td>\n",
" <td>-0.022997</td>\n",
" <td>Found</td>\n",
" <td>2101.0</td>\n",
" <td>0.00000</td>\n",
" <td>0.00000</td>\n",
" <td>(0.000000, 0.000000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38188</th>\n",
" <td>Ur</td>\n",
" <td>24125</td>\n",
" <td>Valid</td>\n",
" <td>Iron</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2501.0</td>\n",
" <td>30.90000</td>\n",
" <td>46.01667</td>\n",
" <td>(30.900000, 46.016670)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38301</th>\n",
" <td>Wietrzno-Bobrka</td>\n",
" <td>24259</td>\n",
" <td>Valid</td>\n",
" <td>Iron</td>\n",
" <td>-0.022439</td>\n",
" <td>Found</td>\n",
" <td>601.0</td>\n",
" <td>49.41667</td>\n",
" <td>21.70000</td>\n",
" <td>(49.416670, 21.700000)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name id nametype recclass mass \\\n",
"16356 Havana 11857 Valid Iron, IAB complex NaN \n",
"30679 Northwest Africa 7701 57150 Valid CK6 -0.022997 \n",
"38188 Ur 24125 Valid Iron NaN \n",
"38301 Wietrzno-Bobrka 24259 Valid Iron -0.022439 \n",
"\n",
" fall year reclat reclong GeoLocation \n",
"16356 Found 301.0 40.33333 -90.05000 (40.333330, -90.050000) \n",
"30679 Found 2101.0 0.00000 0.00000 (0.000000, 0.000000) \n",
"38188 Found 2501.0 30.90000 46.01667 (30.900000, 46.016670) \n",
"38301 Found 601.0 49.41667 21.70000 (49.416670, 21.700000) "
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[(data['year'] > 2016) | (data['year'] < 860)]"
]
},
{
"cell_type": "code",
"execution_count": 135,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>id</th>\n",
" <th>nametype</th>\n",
" <th>recclass</th>\n",
" <th>mass</th>\n",
" <th>fall</th>\n",
" <th>year</th>\n",
" <th>reclat</th>\n",
" <th>reclong</th>\n",
" <th>GeoLocation</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>Northwest Africa 5815</td>\n",
" <td>50693</td>\n",
" <td>Valid</td>\n",
" <td>L5</td>\n",
" <td>-0.022646</td>\n",
" <td>Found</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>(0.000000, 0.000000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>596</th>\n",
" <td>Mason Gully</td>\n",
" <td>53653</td>\n",
" <td>Valid</td>\n",
" <td>H5</td>\n",
" <td>-0.023050</td>\n",
" <td>Fell</td>\n",
" <td>2010.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>(0.000000, 0.000000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1648</th>\n",
" <td>Allan Hills 09004</td>\n",
" <td>52119</td>\n",
" <td>Valid</td>\n",
" <td>Howardite</td>\n",
" <td>-0.022707</td>\n",
" <td>Found</td>\n",
" <td>2009.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>(0.000000, 0.000000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1649</th>\n",
" <td>Allan Hills 09005</td>\n",
" <td>55797</td>\n",
" <td>Valid</td>\n",
" <td>L5</td>\n",
" <td>-0.022880</td>\n",
" <td>Found</td>\n",
" <td>2009.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>(0.000000, 0.000000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1650</th>\n",
" <td>Allan Hills 09006</td>\n",
" <td>55798</td>\n",
" <td>Valid</td>\n",
" <td>H5</td>\n",
" <td>-0.022912</td>\n",
" <td>Found</td>\n",
" <td>2009.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>(0.000000, 0.000000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45655</th>\n",
" <td>Yamato 984144</td>\n",
" <td>40764</td>\n",
" <td>Valid</td>\n",
" <td>H6</td>\n",
" <td>-0.023028</td>\n",
" <td>Found</td>\n",
" <td>1998.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>(0.000000, 0.000000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45656</th>\n",
" <td>Yamato 984145</td>\n",
" <td>40765</td>\n",
" <td>Valid</td>\n",
" <td>L6</td>\n",
" <td>-0.022998</td>\n",
" <td>Found</td>\n",
" <td>1998.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>(0.000000, 0.000000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45657</th>\n",
" <td>Yamato 984146</td>\n",
" <td>40766</td>\n",
" <td>Valid</td>\n",
" <td>H3</td>\n",
" <td>-0.023059</td>\n",
" <td>Found</td>\n",
" <td>1998.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>(0.000000, 0.000000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45658</th>\n",
" <td>Yamato 984147</td>\n",
" <td>40767</td>\n",
" <td>Valid</td>\n",
" <td>LL6</td>\n",
" <td>-0.022886</td>\n",
" <td>Found</td>\n",
" <td>1998.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>(0.000000, 0.000000)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45659</th>\n",
" <td>Yamato 984148</td>\n",
" <td>40768</td>\n",
" <td>Valid</td>\n",
" <td>L5</td>\n",
" <td>-0.023085</td>\n",
" <td>Found</td>\n",
" <td>1998.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>(0.000000, 0.000000)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>6214 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" name id nametype recclass mass fall \\\n",
"37 Northwest Africa 5815 50693 Valid L5 -0.022646 Found \n",
"596 Mason Gully 53653 Valid H5 -0.023050 Fell \n",
"1648 Allan Hills 09004 52119 Valid Howardite -0.022707 Found \n",
"1649 Allan Hills 09005 55797 Valid L5 -0.022880 Found \n",
"1650 Allan Hills 09006 55798 Valid H5 -0.022912 Found \n",
"... ... ... ... ... ... ... \n",
"45655 Yamato 984144 40764 Valid H6 -0.023028 Found \n",
"45656 Yamato 984145 40765 Valid L6 -0.022998 Found \n",
"45657 Yamato 984146 40766 Valid H3 -0.023059 Found \n",
"45658 Yamato 984147 40767 Valid LL6 -0.022886 Found \n",
"45659 Yamato 984148 40768 Valid L5 -0.023085 Found \n",
"\n",
" year reclat reclong GeoLocation \n",
"37 NaN 0.0 0.0 (0.000000, 0.000000) \n",
"596 2010.0 0.0 0.0 (0.000000, 0.000000) \n",
"1648 2009.0 0.0 0.0 (0.000000, 0.000000) \n",
"1649 2009.0 0.0 0.0 (0.000000, 0.000000) \n",
"1650 2009.0 0.0 0.0 (0.000000, 0.000000) \n",
"... ... ... ... ... \n",
"45655 1998.0 0.0 0.0 (0.000000, 0.000000) \n",
"45656 1998.0 0.0 0.0 (0.000000, 0.000000) \n",
"45657 1998.0 0.0 0.0 (0.000000, 0.000000) \n",
"45658 1998.0 0.0 0.0 (0.000000, 0.000000) \n",
"45659 1998.0 0.0 0.0 (0.000000, 0.000000) \n",
"\n",
"[6214 rows x 10 columns]"
]
},
"execution_count": 135,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[(data['reclat'] == 0) & (data['reclong'] == 0)]"
]
},
{
"cell_type": "code",
"execution_count": 136,
"metadata": {},
"outputs": [],
"source": [
"incorrect_years_index = data.loc[(data['year'] > 2016) | (data['year'] < 860)].index\n",
"incorrect_location_index = data.loc[(data['reclat'] == 0) & (data['reclong'] == 0)].index\n",
"\n",
"incorrect_years_index_train = meteorite_train.loc[(meteorite_train['year'] > 2016) | (meteorite_train['year'] < 860)].index\n",
"incorrect_location_index_train = meteorite_train.loc[(meteorite_train['reclat'] == 0) & (meteorite_train['reclong'] == 0)].index\n",
"\n",
"incorrect_years_index_test = meteorite_test.loc[(meteorite_test['year'] > 2016) | (meteorite_test['year'] < 860)].index\n",
"incorrect_location_index_test = meteorite_test.loc[(meteorite_test['reclat'] == 0) & (meteorite_test['reclong'] == 0)].index\n",
"\n",
"incorrect_years_index_val = meteorite_val.loc[(meteorite_val['year'] > 2016) | (meteorite_val['year'] < 860)].index\n",
"incorrect_location_index_val = meteorite_val.loc[(meteorite_val['reclat'] == 0) & (meteorite_val['reclong'] == 0)].index\n",
"\n",
"data.drop(incorrect_years_index.union(incorrect_location_index), inplace=True)\n",
"meteorite_test.drop(incorrect_years_index_test.union(incorrect_location_index_test), inplace=True)\n",
"meteorite_train.drop(incorrect_years_index_train.union(incorrect_location_index_train), inplace=True)\n",
"meteorite_val.drop(incorrect_years_index_val.union(incorrect_location_index_val), inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"name 0\n",
"id 0\n",
"nametype 0\n",
"recclass 0\n",
"mass 117\n",
"fall 0\n",
"year 147\n",
"reclat 0\n",
"reclong 0\n",
"GeoLocation 0\n",
"dtype: int64"
]
},
"execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.isna().sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We wcześniejszych obserwacjach zostało zauważone, że wszystkie meteoryty odnalezione w Szwecji, Österplana mają niską mase przez brak możliwości jej obliczenia. Dlatego wszystkie meteoryty odnalezione w tym miejscu z masą Null zostaną dopisane do tej grupy przypisując im mase 0"
]
},
{
"cell_type": "code",
"execution_count": 138,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>id</th>\n",
" <th>nametype</th>\n",
" <th>recclass</th>\n",
" <th>mass</th>\n",
" <th>fall</th>\n",
" <th>year</th>\n",
" <th>reclat</th>\n",
" <th>reclong</th>\n",
" <th>GeoLocation</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>31014</th>\n",
" <td>Österplana 002</td>\n",
" <td>44802</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1993.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31015</th>\n",
" <td>Österplana 003</td>\n",
" <td>44803</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1993.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31016</th>\n",
" <td>Österplana 004</td>\n",
" <td>44804</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1994.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31017</th>\n",
" <td>Österplana 005</td>\n",
" <td>44805</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1990.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31018</th>\n",
" <td>Österplana 006</td>\n",
" <td>44806</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>NaN</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31019</th>\n",
" <td>Österplana 007</td>\n",
" <td>44807</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1993.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31020</th>\n",
" <td>Österplana 008</td>\n",
" <td>44808</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1995.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31021</th>\n",
" <td>Österplana 009</td>\n",
" <td>44809</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1996.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31022</th>\n",
" <td>Österplana 010</td>\n",
" <td>44810</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1995.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31023</th>\n",
" <td>Österplana 011</td>\n",
" <td>44811</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1997.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31024</th>\n",
" <td>Österplana 012</td>\n",
" <td>44812</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1996.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31025</th>\n",
" <td>Österplana 013</td>\n",
" <td>44813</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1996.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31026</th>\n",
" <td>Österplana 014</td>\n",
" <td>44814</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1996.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31027</th>\n",
" <td>Österplana 015</td>\n",
" <td>44815</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1996.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31028</th>\n",
" <td>Österplana 016</td>\n",
" <td>44816</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1996.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31029</th>\n",
" <td>Österplana 017</td>\n",
" <td>44817</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1997.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31030</th>\n",
" <td>Österplana 018</td>\n",
" <td>44818</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1996.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31031</th>\n",
" <td>Österplana 019</td>\n",
" <td>44819</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1997.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31032</th>\n",
" <td>Österplana 020</td>\n",
" <td>44820</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1997.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31033</th>\n",
" <td>Österplana 021</td>\n",
" <td>44821</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1997.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31034</th>\n",
" <td>Österplana 022</td>\n",
" <td>44822</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1999.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31035</th>\n",
" <td>Österplana 023</td>\n",
" <td>44823</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1999.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31036</th>\n",
" <td>Österplana 024</td>\n",
" <td>44824</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1999.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31037</th>\n",
" <td>Österplana 025</td>\n",
" <td>44825</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2000.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31038</th>\n",
" <td>Österplana 026</td>\n",
" <td>44826</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2000.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31039</th>\n",
" <td>Österplana 027</td>\n",
" <td>44827</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2000.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31040</th>\n",
" <td>Österplana 028</td>\n",
" <td>44828</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2000.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31041</th>\n",
" <td>Österplana 029</td>\n",
" <td>44829</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1998.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31042</th>\n",
" <td>Österplana 030</td>\n",
" <td>44830</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1994.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31043</th>\n",
" <td>Österplana 031</td>\n",
" <td>44831</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1998.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31044</th>\n",
" <td>Österplana 032</td>\n",
" <td>44832</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2000.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31045</th>\n",
" <td>Österplana 033</td>\n",
" <td>44833</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2000.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31046</th>\n",
" <td>Österplana 034</td>\n",
" <td>44834</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1998.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31047</th>\n",
" <td>Österplana 035</td>\n",
" <td>44835</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1996.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31048</th>\n",
" <td>Österplana 036</td>\n",
" <td>44836</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1996.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31049</th>\n",
" <td>Österplana 037</td>\n",
" <td>44837</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1998.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31050</th>\n",
" <td>Österplana 038</td>\n",
" <td>44838</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1999.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31051</th>\n",
" <td>Österplana 039</td>\n",
" <td>44839</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2000.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31052</th>\n",
" <td>Österplana 040</td>\n",
" <td>44840</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2000.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31053</th>\n",
" <td>Österplana 041</td>\n",
" <td>44841</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>1996.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31054</th>\n",
" <td>Österplana 042</td>\n",
" <td>44842</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2000.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31055</th>\n",
" <td>Österplana 043</td>\n",
" <td>44843</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2002.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31056</th>\n",
" <td>Österplana 044</td>\n",
" <td>44844</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2002.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31057</th>\n",
" <td>Österplana 045</td>\n",
" <td>44845</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2002.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31058</th>\n",
" <td>Österplana 046</td>\n",
" <td>44846</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2002.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31059</th>\n",
" <td>Österplana 047</td>\n",
" <td>44847</td>\n",
" <td>Relict</td>\n",
" <td>Relict OC</td>\n",
" <td>NaN</td>\n",
" <td>Found</td>\n",
" <td>2002.0</td>\n",
" <td>58.58333</td>\n",
" <td>13.43333</td>\n",
" <td>(58.583330, 13.433330)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name id nametype recclass mass fall year \\\n",
"31014 Österplana 002 44802 Relict Relict OC NaN Found 1993.0 \n",
"31015 Österplana 003 44803 Relict Relict OC NaN Found 1993.0 \n",
"31016 Österplana 004 44804 Relict Relict OC NaN Found 1994.0 \n",
"31017 Österplana 005 44805 Relict Relict OC NaN Found 1990.0 \n",
"31018 Österplana 006 44806 Relict Relict OC NaN Found NaN \n",
"31019 Österplana 007 44807 Relict Relict OC NaN Found 1993.0 \n",
"31020 Österplana 008 44808 Relict Relict OC NaN Found 1995.0 \n",
"31021 Österplana 009 44809 Relict Relict OC NaN Found 1996.0 \n",
"31022 Österplana 010 44810 Relict Relict OC NaN Found 1995.0 \n",
"31023 Österplana 011 44811 Relict Relict OC NaN Found 1997.0 \n",
"31024 Österplana 012 44812 Relict Relict OC NaN Found 1996.0 \n",
"31025 Österplana 013 44813 Relict Relict OC NaN Found 1996.0 \n",
"31026 Österplana 014 44814 Relict Relict OC NaN Found 1996.0 \n",
"31027 Österplana 015 44815 Relict Relict OC NaN Found 1996.0 \n",
"31028 Österplana 016 44816 Relict Relict OC NaN Found 1996.0 \n",
"31029 Österplana 017 44817 Relict Relict OC NaN Found 1997.0 \n",
"31030 Österplana 018 44818 Relict Relict OC NaN Found 1996.0 \n",
"31031 Österplana 019 44819 Relict Relict OC NaN Found 1997.0 \n",
"31032 Österplana 020 44820 Relict Relict OC NaN Found 1997.0 \n",
"31033 Österplana 021 44821 Relict Relict OC NaN Found 1997.0 \n",
"31034 Österplana 022 44822 Relict Relict OC NaN Found 1999.0 \n",
"31035 Österplana 023 44823 Relict Relict OC NaN Found 1999.0 \n",
"31036 Österplana 024 44824 Relict Relict OC NaN Found 1999.0 \n",
"31037 Österplana 025 44825 Relict Relict OC NaN Found 2000.0 \n",
"31038 Österplana 026 44826 Relict Relict OC NaN Found 2000.0 \n",
"31039 Österplana 027 44827 Relict Relict OC NaN Found 2000.0 \n",
"31040 Österplana 028 44828 Relict Relict OC NaN Found 2000.0 \n",
"31041 Österplana 029 44829 Relict Relict OC NaN Found 1998.0 \n",
"31042 Österplana 030 44830 Relict Relict OC NaN Found 1994.0 \n",
"31043 Österplana 031 44831 Relict Relict OC NaN Found 1998.0 \n",
"31044 Österplana 032 44832 Relict Relict OC NaN Found 2000.0 \n",
"31045 Österplana 033 44833 Relict Relict OC NaN Found 2000.0 \n",
"31046 Österplana 034 44834 Relict Relict OC NaN Found 1998.0 \n",
"31047 Österplana 035 44835 Relict Relict OC NaN Found 1996.0 \n",
"31048 Österplana 036 44836 Relict Relict OC NaN Found 1996.0 \n",
"31049 Österplana 037 44837 Relict Relict OC NaN Found 1998.0 \n",
"31050 Österplana 038 44838 Relict Relict OC NaN Found 1999.0 \n",
"31051 Österplana 039 44839 Relict Relict OC NaN Found 2000.0 \n",
"31052 Österplana 040 44840 Relict Relict OC NaN Found 2000.0 \n",
"31053 Österplana 041 44841 Relict Relict OC NaN Found 1996.0 \n",
"31054 Österplana 042 44842 Relict Relict OC NaN Found 2000.0 \n",
"31055 Österplana 043 44843 Relict Relict OC NaN Found 2002.0 \n",
"31056 Österplana 044 44844 Relict Relict OC NaN Found 2002.0 \n",
"31057 Österplana 045 44845 Relict Relict OC NaN Found 2002.0 \n",
"31058 Österplana 046 44846 Relict Relict OC NaN Found 2002.0 \n",
"31059 Österplana 047 44847 Relict Relict OC NaN Found 2002.0 \n",
"\n",
" reclat reclong GeoLocation \n",
"31014 58.58333 13.43333 (58.583330, 13.433330) \n",
"31015 58.58333 13.43333 (58.583330, 13.433330) \n",
"31016 58.58333 13.43333 (58.583330, 13.433330) \n",
"31017 58.58333 13.43333 (58.583330, 13.433330) \n",
"31018 58.58333 13.43333 (58.583330, 13.433330) \n",
"31019 58.58333 13.43333 (58.583330, 13.433330) \n",
"31020 58.58333 13.43333 (58.583330, 13.433330) \n",
"31021 58.58333 13.43333 (58.583330, 13.433330) \n",
"31022 58.58333 13.43333 (58.583330, 13.433330) \n",
"31023 58.58333 13.43333 (58.583330, 13.433330) \n",
"31024 58.58333 13.43333 (58.583330, 13.433330) \n",
"31025 58.58333 13.43333 (58.583330, 13.433330) \n",
"31026 58.58333 13.43333 (58.583330, 13.433330) \n",
"31027 58.58333 13.43333 (58.583330, 13.433330) \n",
"31028 58.58333 13.43333 (58.583330, 13.433330) \n",
"31029 58.58333 13.43333 (58.583330, 13.433330) \n",
"31030 58.58333 13.43333 (58.583330, 13.433330) \n",
"31031 58.58333 13.43333 (58.583330, 13.433330) \n",
"31032 58.58333 13.43333 (58.583330, 13.433330) \n",
"31033 58.58333 13.43333 (58.583330, 13.433330) \n",
"31034 58.58333 13.43333 (58.583330, 13.433330) \n",
"31035 58.58333 13.43333 (58.583330, 13.433330) \n",
"31036 58.58333 13.43333 (58.583330, 13.433330) \n",
"31037 58.58333 13.43333 (58.583330, 13.433330) \n",
"31038 58.58333 13.43333 (58.583330, 13.433330) \n",
"31039 58.58333 13.43333 (58.583330, 13.433330) \n",
"31040 58.58333 13.43333 (58.583330, 13.433330) \n",
"31041 58.58333 13.43333 (58.583330, 13.433330) \n",
"31042 58.58333 13.43333 (58.583330, 13.433330) \n",
"31043 58.58333 13.43333 (58.583330, 13.433330) \n",
"31044 58.58333 13.43333 (58.583330, 13.433330) \n",
"31045 58.58333 13.43333 (58.583330, 13.433330) \n",
"31046 58.58333 13.43333 (58.583330, 13.433330) \n",
"31047 58.58333 13.43333 (58.583330, 13.433330) \n",
"31048 58.58333 13.43333 (58.583330, 13.433330) \n",
"31049 58.58333 13.43333 (58.583330, 13.433330) \n",
"31050 58.58333 13.43333 (58.583330, 13.433330) \n",
"31051 58.58333 13.43333 (58.583330, 13.433330) \n",
"31052 58.58333 13.43333 (58.583330, 13.433330) \n",
"31053 58.58333 13.43333 (58.583330, 13.433330) \n",
"31054 58.58333 13.43333 (58.583330, 13.433330) \n",
"31055 58.58333 13.43333 (58.583330, 13.433330) \n",
"31056 58.58333 13.43333 (58.583330, 13.433330) \n",
"31057 58.58333 13.43333 (58.583330, 13.433330) \n",
"31058 58.58333 13.43333 (58.583330, 13.433330) \n",
"31059 58.58333 13.43333 (58.583330, 13.433330) "
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[(data['mass'].isnull()) & (data['name'].str.startswith('Österplana'))]"
]
},
{
"cell_type": "code",
"execution_count": 139,
"metadata": {},
"outputs": [],
"source": [
"data.loc[(data['mass'].isnull()) & (data['name'].str.startswith('Österplana')), 'mass'] = 0\n",
"meteorite_test.loc[(meteorite_test['mass'].isnull()) & (meteorite_test['name'].str.startswith('Österplana')), 'mass'] = 0\n",
"meteorite_train.loc[(meteorite_train['mass'].isnull()) & (meteorite_train['name'].str.startswith('Österplana')), 'mass'] = 0\n",
"meteorite_val.loc[(meteorite_val['mass'].isnull()) & (meteorite_val['name'].str.startswith('Österplana')), 'mass'] = 0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Reszta zostanie usunięta, tak samo z latami"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"name 0\n",
"id 0\n",
"nametype 0\n",
"recclass 0\n",
"mass 0\n",
"fall 0\n",
"year 0\n",
"reclat 0\n",
"reclong 0\n",
"GeoLocation 0\n",
"dtype: int64"
]
},
"execution_count": 140,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.dropna(subset=['mass', 'year'], inplace=True)\n",
"meteorite_train.dropna(subset=['mass', 'year'], inplace=True)\n",
"meteorite_test.dropna(subset=['mass', 'year'], inplace=True)\n",
"meteorite_val.dropna(subset=['mass', 'year'], inplace=True)\n",
"data.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\Genos\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\pyproj\\crs\\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n",
" in_crs_string = _prepare_from_proj_string(in_crs_string)\n",
"C:\\Users\\Genos\\AppData\\Local\\Temp\\ipykernel_21240\\2651438472.py:4: FutureWarning: The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.\n",
" world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))\n"
]
},
{
"data": {
"text/plain": [
"<Axes: >"
]
},
"execution_count": 141,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABkgAAAMYCAYAAACAPmtuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzddXxb59UH8N8VM9qyzAyxw8xNmobKTdqVaWVa23Vdaeu6boV3XdduKzOl3LRdKSmEGmawg2Zmi1n3vn/IduKYZFuyDOf7+ShXlqWrR44t3fuc55zDcBzHgRBCCCGEEEIIIYQQQgghZBThRXoAhBBCCCGEEEIIIYQQQgghg40CJIQQQgghhBBCCCGEEEIIGXUoQEIIIYQQQgghhBBCCCGEkFGHAiSEEEIIIYQQQgghhBBCCBl1KEBCCCGEEEIIIYQQQgghhJBRhwIkhBBCCCGEEEIIIYQQQggZdShAQgghhBBCCCGEEEIIIYSQUYcCJIQQQgghhBBCCCGEEEIIGXUEkR7AQLEsi+rqaiiVSjAME+nhEEIIIYQQQgghhBBCCCEkgjiOg9VqRVxcHHi87vNEhn2ApLq6GomJiZEeBiGEEEIIIYQQQgghhBBChpCKigokJCR0+/1hHyBRKpUAAi9UpVJFeDSEEEIIIYQQQgghhBBCCIkki8WCxMTE9vhBd4Z9gKStrJZKpaIACSGEEEIIIYQQQgghhBBCAKDXthzUpJ0QQgghhBBCCCGEEEIIIaMOBUgIIYQQQgghhBBCCCGEEDLqUICEEEIIIYQQQgghhBBCCCGjDgVICCGEEEIIIYQQQgghhBAy6lCAhBBCCCGEEEIIIYQQQgghow4FSAghhBBCCCGEEEIIIYQQMupQgIQQQgghhBBCCCGEEEIIIaMOBUgIIYQQQgghhBBCCCGEEDLqUICEEEIIIYQQQgghhBBCCCGjDgVICCGEEEIIIYQQQgghhBAy6lCAhBBCCCGEEEIIIYQQQgghow4FSAghhBBCCCGEEEIIIYQQMupQgIQQQgghhBBCCCGEEEIIIaMOBUgIIYQQQgghhBBCCCGEEDLqUICEEEIIIYQQQgghhBBCCCGjDgVICCGEEEIIIYQQQgghhBAy6lCAhBBCCCGEEEIIIYQQQgghow4FSAghhBBCCCGEEEIIIYQQMupQgIQQQgghhBBCCCGEEEIIIaMOBUgIIYQQQgghhBBCCCGEEDLqUICEEEIIIYQQQgghhBBCCCGjDgVICCGEEEIIIYQQQgghhBAy6lCAhBBCCCGEEEIIIYQQQgghow4FSAghhBBCCCGEEEIIIYQQMupQgIQQQgghhBBCCCGEEEIIIaMOBUgIIYQQQgghhBBCCCGEEDLqUICEEEIIIYQQQgghhBBCCCGjDgVICCGEEEIIIYQQQgghhBAy6lCAhBBCCCGEEEIIIYQQQgghow4FSAghhBBCCCGEEEIIIYQQMuoIIj0AQgghhBAyunh8LOxuHzx+Fh4fC7ePhbf1etttbbef+rWPZcEwDIQ8BnweAwGfAZ/Hg6Dt6/Yt75Tvd3F7h8d3cXvrlmGYSP+oCCGEEEIIIYSEEQVICCGEEEJI2FU0O7D+WD3WHa3HtqImuH1spIfUK/4pARMBj0G0UowErQyJOmlgq5UhQStFok4GrUxIARVCCCGEEEIIGWYoQEIIIYSMYBzHwen1w+L0werywuLywuL0weLywurywedn4ecAP8vCz5625Tj4WA4sG9iKBDzkxqqQFaOEx8fC6mrbj7f1uq/9etvW5vaBASAS8CAW8Fu3gcvpt4m6uK2n+3a1PxGfR5PUIeL1syhptONorRVHayw4VmtFi8MDPo8BjwlkX/CYkwGEtuu8tgwMJnAdAPaVt6CowR7hV9R3fpaDn+Xgaf3a4vJ1+zrkIj4STgmYJGilHb5WS4WDN3BCCCGEEEIIIUGhAAkhhBAyAnh8LL47VI3SRgeqTE5UtThRaXKg1uyC189FeniDhs9joBALoJQIoJIIoZQIoJQIoZIEblPLREjSyZAaJUdalBxauSjSQ444juPQYHXjSK0Vx2otOFpjxdFaKwrrbfD4h36Wx1Bh9/hxrM6KY3XWLr+vlAhas06kHbNQWrcKMR2WE0IIIYQQQshgozMxQgghZIipNbuwrbgRRfV2SIQ8SEUCyER8SIV8pETJMTZOBQGf137/ogYb7v54H/KrLBEc9dDgZzmYnV6YnV4Azl7vr5EJW4MlCszN1OOiSQnhH+QQUNniwJubS3C0JjCh32z39P4gMiBWlw9Haiw4UnPy71Qm4mNcvBoTEjVYkB2NGal68HmUAUUIIYQQQgghg4UCJIQQQkgEsSyHeqsbO0ubsa2oCduLm1DS2HMpIqVYgBlpesxO1wMAnll7DE6vfzCGO+KYHF4UVFvg9bOYkKiO9HCCYnF5cajSjAOVJhytsSJKIUa6QY70aAUyDAro5aJey4x9vqcSb28pHZwBD1PC1gbuQh6vvRm8kN/a2L21fBi/razYKeXF+Aw63Nbh+wwQo5JgQqIGExM1SI9WUECEEEIIIYQQQiKIAiSEEEJICLSVKapocaLB6obb54fHx8LjZ+HxsTA5vGi2e9Bs96DJ7kaTLXC9xeEB28cKWFa3Dz8fqcPPR+rC82JGsESdFNkxKoyJVSLbqESOUYUUvaxDRs6pOI6DxeVDi90Dm9sHu9sHu8cHm9sPmyvw9em3290+eP0sOA7gwIHj2vZ12tft/wBp0XIsyI7GnIwoKCUne1W4vH4crrHgQIUJB1uDIsW99PJQS4VIjw4ETNINCqRHKxCjEkMpEbaXH5ubEQWvn8WeshbsrzDB5R25pbRyY1VYOSUBmQYF/BwHv58LbFv7i7AcB1/rbW39dnz+wN+u28tCLRMiTi1FrEaCOLUUmn40Y2+xe1DUYINYwMe4hOERiCOEEEIIIYSQ0YDhOG5YFya3WCxQq9Uwm81QqVSRHg4hhJBRZP2xery7tRQVzQ5Utjjh9o3cSeahTikRQC8XQa8QQycXtV4XQScXI0ohQoJWiqwYZYfgAxAIQJQ22VFjdqHG5EKt2Ylqswu1ZheqzU7Uml1weAYvO0fAYzA1RYtknRz51WYcq7XCF2QETeO04Kk1L+DxRTehRhWNWEsDHv3ldTy07E6YpB2PkYR8BhqZCKlRciTrZPD4A0E8k9OLWrMTdRZ3OF7eiCAV8tuDJXEaCWJbt3EaKaKVYtSYXSiqt6GowYaiejsKG2wdSphNTNTg+jkpWD42FiJB14G5YLl9frTYTwZfmx0eNNvcaHZ40Wx3w+vjoJIKoJYKoZIKA1vJKddbvycW8Af6YyGEEEIIIYSQISXYuAEFSAghhJB+Mju9uPTVbTha23VTZhIesWoJZqTqMCNNj6nJWsSoJRDxee2ZGQI+A2E3GSGns7q8uOL1HThUZQ7jiAfHy18+ieXHt6JMY8S959yH5757FsmmWvyQNRu3XfRw0PsRC3iQCPmwuLwY3keJQ5OQz+CK6Un43aJM6BXiDt9zef2oNbvQ1BrwaLF70NSaadZka9223t7cmtUUChIhDxqpCPeclYlLpyX2OUOGEEIIIYQQQoYaCpAQQgghIcKyHDx+Fj6Wg0TA61SOqc7iwvbiQP+QrUVNKGtyRGiko5NIwMOUJC1mp+sxO0OP8QmaHgMkHMfB5WXR4ghMOBc32PGHzw4M+wygWEsDPv7oISSbattvK9MYcdnlT6FGFR3BkREAYBjgoonxuGtRJgCgtNGO4kY7ShvtKGm9VJudEQ9K3bYgHX9cmk1BEkIIIUOKn+WobxchhJA+oQAJIYQQ0kdbixrxzNpjKGtywNvaP8TX2qegTYxKjMunJ+GK6UkwqCSd9sFxHDYXNuK/vxRiZ2nzYA5/1ODzGExM1GBWa6P6yclaSIRdlwhqsrlxqMqM/CozDlaaUVBtQYPNDc8wD4Z0Z3LlEaxedX/71yuufAZ7E8ZEcESkTbxGCpGAh4pmR9Bl08JJIRYgK0aBbKMKOcZAT57sGCW0clGkh0YIIWSUMTu9KGqwod4SKHNaa3EHrrde6swueP0cJid
"text/plain": [
"<Figure size 2000x1000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"loc_geom = [Point(xy) for xy in zip(data['reclong'], data['reclat'])]\n",
"geo_df = gpd.GeoDataFrame(data, crs=loc_crs, geometry=loc_geom)\n",
"\n",
"world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))\n",
"geo_df.plot(ax=world.plot(figsize=(20, 10)), marker='x', color='red', markersize=15)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}