547 lines
907 KiB
Plaintext
547 lines
907 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from sklearn.cluster import KMeans\n",
|
||
|
"from sklearn.decomposition import PCA\n",
|
||
|
"from sklearn.preprocessing import StandardScaler\n",
|
||
|
"import pandas as pd\n",
|
||
|
"import numpy as np\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"import seaborn as sns"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Formatowanie danych"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>-0.388640</td>\n",
|
||
|
" <td>-0.499154</td>\n",
|
||
|
" <td>-0.618683</td>\n",
|
||
|
" <td>-0.193055</td>\n",
|
||
|
" <td>-0.986574</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>0.481205</td>\n",
|
||
|
" <td>0.089217</td>\n",
|
||
|
" <td>-0.618683</td>\n",
|
||
|
" <td>-0.737328</td>\n",
|
||
|
" <td>-0.263285</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>0.072479</td>\n",
|
||
|
" <td>0.994405</td>\n",
|
||
|
" <td>0.503142</td>\n",
|
||
|
" <td>2.800447</td>\n",
|
||
|
" <td>0.821648</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>0.208721</td>\n",
|
||
|
" <td>0.270255</td>\n",
|
||
|
" <td>0.503142</td>\n",
|
||
|
" <td>-1.009464</td>\n",
|
||
|
" <td>-0.263285</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>0.150328</td>\n",
|
||
|
" <td>0.089217</td>\n",
|
||
|
" <td>0.503142</td>\n",
|
||
|
" <td>1.984038</td>\n",
|
||
|
" <td>3.353159</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" 0 1 2 3 4\n",
|
||
|
"0 -0.388640 -0.499154 -0.618683 -0.193055 -0.986574\n",
|
||
|
"1 0.481205 0.089217 -0.618683 -0.737328 -0.263285\n",
|
||
|
"2 0.072479 0.994405 0.503142 2.800447 0.821648\n",
|
||
|
"3 0.208721 0.270255 0.503142 -1.009464 -0.263285\n",
|
||
|
"4 0.150328 0.089217 0.503142 1.984038 3.353159"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"data = pd.read_csv('flats_for_clustering.tsv',sep='\\t')\n",
|
||
|
"data['Piętro'] = data['Piętro'].replace('parter','0')\n",
|
||
|
"data['Piętro'] = data['Piętro'].replace('niski parter','0')\n",
|
||
|
"data[\"Piętro\"] = data[\"Piętro\"].apply(pd.to_numeric, errors=\"coerce\")\n",
|
||
|
"data = data.dropna()\n",
|
||
|
"scaler = StandardScaler()\n",
|
||
|
"scaler.fit(data)\n",
|
||
|
"scaled_data = scaler.transform(data)\n",
|
||
|
"scaled_data = pd.DataFrame(scaled_data)\n",
|
||
|
"scaled_data.head()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<seaborn.axisgrid.PairGrid at 0x7f0d6a5819d0>"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 5,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABM8AAATPCAYAAAAWF9LOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXxU9bk/8M/sS5JJQgIIskUTZQuI4ppglS6KgkKx/qr2yqLdFO1yq6iAooCKS2+r2NZWEL3XpbUWVNzaSluB1g2pLIISpYCyJiSZzHpm+/0xmWHOOnOSSWbJ5/16+XpJzsnMmcn5nuU5z/d5DLFYLAYiIiIiIiIiIiKSMeZ6A4iIiIiIiIiIiPIVg2dEREREREREREQqGDwjIiIiIiIiIiJSweAZERERERERERGRCgbPiIiIiIiIiIiIVDB4RkREREREREREpILBMyIiIiIiIiIiIhUMnhEREREREREREalg8IyIiIiIiIiIiEgFg2dEREREREREREQqGDxL8fbbb2PatGkYPHgwDAYD1q5dq/s1YrEYHnroIZxyyimw2Ww48cQTsWzZsuxvLBERERERERER9Thzrjcgn3i9XowfPx5z587FN7/5zS69xo9+9CP8+c9/xkMPPYT6+nocO3YMx44dy/KWEhERERERERFRbzDEYrFYrjciHxkMBqxZswbTp09P/iwYDGLBggV47rnn0NbWhrFjx2L58uW44IILAAA7d+7EuHHjsH37dpx66qm52XAiIiIiIiIiIsoaTtvUYd68efjXv/6F559/Hlu3bsW3vvUtXHzxxdi9ezcA4JVXXsFJJ52EdevWoaamBiNGjMD111/PzDMiIiIiIiIiogLF4FmG9u3bhyeffBIvvPACJk2ahJNPPhk/+9nP0NjYiCeffBIA8Pnnn2Pv3r144YUX8PTTT2P16tXYvHkzrrjiihxvPRERERERERERdQVrnmVo27ZtiEQiOOWUU0Q/DwaDqKqqAgBEo1EEg0E8/fTTyfVWrlyJM844A5988gmnchIRERERERERFRgGzzLk8XhgMpmwefNmmEwm0bLS0lIAwKBBg2A2m0UBtlGjRgGIZ64xeEZEREREREREVFgYPMvQhAkTEIlEcOTIEUyaNElxnYaGBoTDYXz22Wc4+eSTAQCffvopAGD48OG9tq1ERERERERERJQd7LaZwuPxoKmpCUA8WPbzn/8cF154Ifr164dhw4bhO9/5DjZt2oSHH34YEyZMwNGjR/HWW29h3LhxuPTSSxGNRnHmmWeitLQUv/jFLxCNRnHjjTfC5XLhz3/+c44/HRERERERERER6cXgWYq///3vuPDCC2U/nzVrFlavXo1QKISlS5fi6aefxpdffonq6mqcc845uPvuu1FfXw8AOHDgAG666Sb8+c9/RklJCaZMmYKHH34Y/fr16+2PQ0RERERERERE3cTgGRERERERERERkQpjrjeAiIiIiIiIiIgoXzF4RkREREREREREpILBMwCxWAxutxucwUqU3zhWiQoHxytRYeBYJSocHK9EucPgGYCOjg6Ul5ejo6Mj15tCRBo4VokKB8crUWHgWCUqHByvRLnD4BkREREREREREZEKBs+IiIiIiIiIiIhUMHhGRERERERERESkgsEzIiIiIiIiIiIiFQyeERERERERERERqTDnegOIiIpFu09As0eAOxCCy2FBdYkV5U5rrjeLiHKAxwOi7OO4Iip+HOeUrxg8IyLKggNtfsx/cSs27G5O/uz8umrcP3McBlc4crhlRNTbeDwgyj6OK6Lix3FO+YzTNomIuqndJ8hO9ADw9u5m3PbiVrT7hBxtGRH1Nh4PiLKP44qo+HGcU75j8IyIqJuaPYLsRJ/w9u5mNHt4sifqK3g8IMo+jiui4sdxTvmOwTMiom5yB0KayzvSLCei4sHjAVH2cVwRFT+Oc8p3DJ4REXWTy27RXF6WZjkRFQ8eD4iyj+OKqPhxnFO+Y/CMiKibqkutOL+uWnHZ+XXVqC5lhyCivoLHA6Ls47giKn4c55TvGDwjIuqmcqcV988cJzvhn19XjeUzx7G9NlEfwuMBUfZxXBEVP45zyneGWCwWy/VG5Jrb7UZ5eTna29vhcrlyvTlEpCLfx2q7T0CzR0BHIIQyuwXVpVae6KnPyvfx2tN4PKBCUUhjleOK+rpCGq9dxXFO+cqc6w0gIioW5U6e3IkojscDouzjuCIqfhznlK84bZOIiIiIiIiIiEgFg2dEREREREREREQqGDwjIiIiIiIiIiJSweAZERERERERERGRCgbPiIiIiIiIiIiIVDB4RkREREREREREpILBMyIiIiIiIiIiIhUMnhEREREREREREalg8IyIiIiIiIiIiEgFg2dEREREREREREQqGDwjIiIiIiIiIiJSweAZERERERERERGRCgbPiIiIiIiIiIiIVDB4RkREREREREREpILBMyIiIiIiIiIiIhUMnhEREREREREREalg8IyIiIiIiIiIiEgFg2dEREREREREREQqGDwjIiIiIiIiIiJSweAZERERERERERGRCgbPiIiIiIiIiIiIVBR88OzXv/41xo0bB5fLBZfLhXPPPRevv/56rjeLiIiIiIiIiIiKQMEHz4YMGYL7778fmzdvxgcffIDJkyfj8ssvx44dO3K9aUREREREREREVODMud6A7po2bZro38uWLcOvf/1rvPPOOxgzZkyOtoqIiIiIiIiIiIpBwQfPUkUiEbzwwgvwer0499xzVdcLBoMIBoPJf7vd7t7YPCLSiWOVqHBwvBIVBo5VosLB8UqUPwp+2iYAbNu2DaWlpbDZbPjBD36ANWvWYPTo0arr33fffSgvL0/+N3To0F7cWiLKFMcqUeHgeCUqDByrRIWD45UofxhisVgs1xvRXYIgYN++fWhvb8cf//hHPPHEE/jHP/6hGkBTiuAPHToU7e3tcLlcvbXZRJQGxypR4eB4JSoMHKtEhYPjlSh/FMW0TavVitraWgDAGWecgffffx+//OUv8fjjjyuub7PZYLPZenMTiagLOFaJCgfHK1Fh4FglKhwcr0T5oyimbUpFo1FRhJ6IiIiIiIiIiKgrCj7z7Pbbb8eUKVMwbNgwdHR04Nlnn8Xf//53vPnmm7neNCIiIiIiIiIiKnAFHzw7cuQIrr32Whw8eBDl5eUYN24c3nzzTXz961/P9aYREREREREREVGBK/jg2cqVK3O9CUREREREREREVKSKsuYZERERERERERFRNjB4RkREREREREREpILBMyIiIiIiIiIiIhUMnhEREREREREREalg8IyIiIiIiIiIiEgFg2dEREREREREREQqGDwjIiIiIiIiIiJSweAZERERERERERGRCgbPiIiIiIiIiIiIVDB4RkREREREREREpILBMyIiIiIiIiIiIhUMnhEREREREREREalg8IyIiIiIiIiIiEgFg2dEREREREREREQqGDwjIiIiIiIiIiJSweAZERERERERERGRCgbPiIiIiIiIiIiIVDB4RkREREREREREpILBMyIiIiIiIiIiIhUMnhEREREREREREalg8IyIiIiIiIiIiEgFg2dEREREREREREQqGDwjIiIiIiIiIiJSweAZERERERERERGRCgbPiIiIiIiIiIiIVDB4RkREREREREREpILBMyIiIiIiIiIiIhUMnhEREREREREREalg8IyIiIiIiIiIiEgFg2dEREREREREREQqzLneACIiolTtPgHNHgHuQAguhwXVJVaUO6253iwiXbgfE2WGY4UotzgGiTLD4BkREeWNA21+zH9xKzbsbk7+7Py6atw/cxwGVzhyuGVEmeN+TJQZjhWi3OIYJMocp20SEVFeaPcJsgs4AHh7dzNue3Er2n1CjraMKHPcj4kyw7FClFscg0T6MHhGRER5odkjyC7gEt7e3YxmDy/iKP9xPybKDMcKUW5xDBLpw+AZERHlBXcgpLm8I81yonzA/ZgoMxwrRLnFMUikD4NnRESUF1x2i+bysjTLifIB92OizHCsEOUWxyCRPgyeERFRXqguteL8umr
|
||
|
"text/plain": [
|
||
|
"<Figure size 1250x1250 with 30 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"sns.pairplot(data)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Algorytm K średnich"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"kmeans = KMeans(n_clusters=5,init='k-means++')\n",
|
||
|
"kmeans = kmeans.fit(data)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"array([[5.34232145e+05, 7.85017921e+01, 3.25448029e+00, 4.91756272e+00,\n",
|
||
|
" 2.63082437e+00],\n",
|
||
|
" [2.30703732e+05, 4.16620939e+01, 1.98700361e+00, 5.51841155e+00,\n",
|
||
|
" 2.55379061e+00],\n",
|
||
|
" [1.61545162e+06, 1.51384615e+02, 4.03846154e+00, 2.69230769e+00,\n",
|
||
|
" 1.23076923e+00],\n",
|
||
|
" [3.52262093e+05, 5.68337226e+01, 2.64128912e+00, 6.15226530e+00,\n",
|
||
|
" 2.90565156e+00],\n",
|
||
|
" [9.08821504e+05, 1.11654135e+02, 3.74436090e+00, 4.48120301e+00,\n",
|
||
|
" 2.38345865e+00]])"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 7,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"kmeans.cluster_centers_"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 8,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"data['Clusters'] = kmeans.labels_"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 9,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>cena</th>\n",
|
||
|
" <th>Powierzchnia w m2</th>\n",
|
||
|
" <th>Liczba pokoi</th>\n",
|
||
|
" <th>Liczba pięter w budynku</th>\n",
|
||
|
" <th>Piętro</th>\n",
|
||
|
" <th>Clusters</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>290386</td>\n",
|
||
|
" <td>46</td>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>5.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>450000</td>\n",
|
||
|
" <td>59</td>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>3.0</td>\n",
|
||
|
" <td>2.0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>375000</td>\n",
|
||
|
" <td>79</td>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>16.0</td>\n",
|
||
|
" <td>5.0</td>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>400000</td>\n",
|
||
|
" <td>63</td>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>2.0</td>\n",
|
||
|
" <td>2.0</td>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>389285</td>\n",
|
||
|
" <td>59</td>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>13.0</td>\n",
|
||
|
" <td>12.0</td>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" cena Powierzchnia w m2 Liczba pokoi Liczba pięter w budynku Piętro \\\n",
|
||
|
"0 290386 46 2 5.0 0.0 \n",
|
||
|
"1 450000 59 2 3.0 2.0 \n",
|
||
|
"2 375000 79 3 16.0 5.0 \n",
|
||
|
"3 400000 63 3 2.0 2.0 \n",
|
||
|
"4 389285 59 3 13.0 12.0 \n",
|
||
|
"\n",
|
||
|
" Clusters \n",
|
||
|
"0 1 \n",
|
||
|
"1 0 \n",
|
||
|
"2 3 \n",
|
||
|
"3 3 \n",
|
||
|
"4 3 "
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 9,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"data.head()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 10,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"3 2141\n",
|
||
|
"1 1385\n",
|
||
|
"0 558\n",
|
||
|
"4 133\n",
|
||
|
"2 26\n",
|
||
|
"Name: Clusters, dtype: int64"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 10,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"data['Clusters'].value_counts()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 11,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<seaborn.axisgrid.PairGrid at 0x7f0d7826b250>"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 11,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABR4AAATPCAYAAABnSul5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3xddfnA8c+5O3vPJmmT7r0XLdAWWsresgXBhYAoigiiAqKIgoDyE1GRISKyd6HQvRfde2c3e999z++Pb26S29ybpJBmPu/Xq0rPuTk5Se/3nnOe7/N9Hk3XdR0hhBBCCCGEEEIIIYToRIbuPgEhhBBCCCGEEEIIIUTfI4FHIYQQQgghhBBCCCFEp5PAoxBCCCGEEEIIIYQQotNJ4FEIIYQQQgghhBBCCNHpJPAohBBCCCGEEEIIIYTodBJ4FEIIIYQQQgghhBBCdDoJPAohhBBCCCGEEEIIITqdBB6FEEIIIYQQQgghhBCdTgKPQgghhBBCCCGEEEKITieBRyGEEEIIIYQQQgghRKeTwGMLK1eu5OKLLyY9PR1N03jvvfdO+Ri6rvPEE08wbNgwrFYrAwYM4Le//W3nn6wQQgghhBBCCCGEED2YqbtPoCepr69n/Pjx3HrrrVxxxRVf6Rh33303ixcv5oknnmDs2LFUVFRQUVHRyWcqhBBCCCGEEEIIIUTPpum6rnf3SfREmqbx7rvvctlllzVtczqd/OIXv+C///0vVVVVjBkzhscff5w5c+YAsHfvXsaNG8euXbsYPnx495y4EEIIIYQQQgghhBA9gCy1PgV33nkn69at4/XXX2fHjh1cffXVLFy4kIMHDwLw4YcfkpOTw0cffUR2djaDBg3i29/+tmQ8CiGEEEIIIYQQQoh+RwKPHZSbm8uLL77Im2++yZlnnsngwYP56U9/yuzZs3nxxRcBOHLkCMePH+fNN9/klVde4aWXXmLLli1cddVV3Xz2QgghhBBCCCGEEEJ0Lanx2EE7d+7E6/UybNiwgO1Op5OEhAQAfD4fTqeTV155pel1L7zwApMnT2b//v2y/FoIIYQQQgghhBBC9BsSeOyguro6jEYjW7ZswWg0BuyLjIwEIC0tDZPJFBCcHDlyJKAyJiXwKIQQQgghhBBCCCH6Cwk8dtDEiRPxer2UlJRw5plnBn3NrFmz8Hg8HD58mMGDBwNw4MABAAYOHNhl5yqEEEIIIYQQQgghRHeTrtYt1NXVcejQIUAFGv/0pz8xd+5c4uPjycrK4sYbb2TNmjU8+eSTTJw4kdLSUpYsWcK4ceO48MIL8fl8TJ06lcjISJ5++ml8Ph933HEH0dHRLF68uJt/OiGEEEIIIYQQQgghuo4EHltYvnw5c+fObbX95ptv5qWXXsLtdvPoo4/yyiuvUFBQQGJiIjNmzODhhx9m7NixABQWFnLXXXexePFiIiIiOP/883nyySeJj4/v6h9HCCGEEEIIIYQQQohuI4FHIYQQQgghhBBCCCFEpzN09wkIIYQQQgghhBBCCCH6Hgk8CiGEEEIIIYQQQgghOp0EHgFd16mpqUFWnQvRc8k4FaLnk3EqRM8n41SInk/GqRCiL5HAI1BbW0tMTAy1tbXdfSpCiBBknArR88k4FaLnk3EqRM8n41QI0ZdI4FEIIYQQQgghhBBCCNHpJPAohBBCCCGEEEIIIYTodBJ4FEIIIYQQQgghhBBCdDoJPAohhBBCCCGEEEIIITqdBB6FEKJL6IAb8HT3iQgh+gQP4AJ83X0iQvQSMmaE6Fvk3lqI3sLU3ScghBB9nwM4AZQARiADiAUs3XhOQojeyQXUAbmoB644IB0IA7RuPC8heio3YAeOo67HUUAmYENdk4UQvY/cWwvRm0jgUQghTis78CUqWOBXDSQCw5EbJCFEx7lRAce8FtvqgEJgEhDZHSclRA/mRQUmDrTYVgcUAxNQgQohRO8i99ZC9Day1FoIIU4bHypA4Aqyrwxo6NrTEUL0ci4Cg45+XuAgKjAphGjmQo2Nk+nAXsDZtacjhPia5N5aiN5IAo9CCHHauFHLQEIp6qoTEUL0CRVt7KtC6lwJcbIGVJAxGAcSrBeit5F7ayF6Iwk8djFd11mcv5lyR3V3n4oQQgghhBBCCCGEEKeNBB67WIPHyZayA3xesKW7T0UIcdqZgZQ29qd11YkIIfqE+Db2xSKlu4U4WTihmy7ZUNdpIUTvIffWQvRGEnjsYk6fqkfh8Xm7+UyEEKefAdU5M1iR60TUA5EQQnSUBfWZcjIjMBQJoghxMgtqbJxMA0YC1q49HSHE1yT31kL0RjI13sX8AcdQ1WaEEH1NGDAZVY+mBBUgyABikK57QohTYwayUJmPuahaV3FAOuqzRggRyAgkozq+56LqOkahAhe2bjwvIcRXJ/fWQvQ2EnjsYpLpKER/ZEMFC9JQM7Xy0SuE+KosqMBjNKq7pwlZwCJEW8yogMRIZMwI0VfIvbUQvYmM0C7m1X3dfQpCiG6hIbOwQojOI7dwQpwaGTNC9C1yby1EbyHTfV3MH3jUZbG1EEIIIYQQQgghhOjDJPDYxSTjUQghhBBCCCGEEEL0BxJ47GI+CTwKIYQQQgghhBBCiH5AAo9drCnjUVZaCyGEEEIIIYQQQog+TAKPXUxqPAohhBBCCCGEEEKI/kACj11MlloLIYQQQgghhBBCiP5AAo9dzKdLpqMQQgghhBBCCCGE6Psk8NjF/BmPugQghRBCCCGEEEIIIUQfJoHHLubPePRJjUchhBBCCCGEEEII0YdJ4LGL+VAZj16p9SiEEEIIIYQQQggh+jAJPHYxf8ajLLUWQgghhBBCCCGEEH2ZBB67WNNSa8l4FEIIIYQQQgghhBB9mAQeu5g/4OiVjEchhBBCCCGEEEII0YdJ4LGLNXW1RjIehRBCCCGEEEIIIUTfJYHHLubvZu2TjEchhBBCCCGEEEII0YdJ4LGLNdd4lMCjEEIIIYQQQgghhOi7JPDYxZqXWkvgUQghhBBCCCGEEEL0XRJ47GLS1VoIIYQQQgghhBBC9AcSeOxivsamMrLUWgghhBBCCCGEEEL0ZRJ47GJ6Y8BRlloLIYQQQgghhBBCiL5MAo9dzOuv8SgZj0IIIYQQQgghhBCiD+v1gcfnnnuOcePGER0dTXR0NDNnzmTRokXdfVohNdV4lIxHIYQQQgghhBBCCNGH9frAY0ZGBr///e/ZsmULmzdvZt68eVx66aXs3r27u08tKH+NR5AGM0IIIYQQQgghhBCi7zJ19wl8XRdffHHA33/729/y3HPPsX79ekaPHt1NZxVay6Yyuq6D1o0nI4QQQgghhBBCCCHEadLrA48teb1e3nzzTerr65k5c2bI1zmdTpxOZ9Pfa2pquuL0gMAsRx86xi77zkL0Lt05ToUQHSPjVIieT8apED2fjFMhRF/W65daA+zcuZPIyEisVivf//73effddxk1alTI1z/22GPExMQ0/cnMzOyyc22Z8eiTBjNChNSd41QI0TEyToXo+WScCtHzyTgVQvRlmt4H2iu7XC5yc3Oprq7mrbfe4p///CcrVqwIGXwMNqOUmZlJdXU10dHRp/Vc/3d4GUdqiwD48ZirsJksp/X7CdFbdec4FUJ0jIxTIXo+GadC9HwyToUQfVmfWGptsVgYMmQIAJMnT2bTpk0888wzPP/880Ffb7VasVqtXXmKTXy6DwMaPnTpbC1EG7pznAohOkbGqRA9n4xTIXo+GadCiL6sTyy1PpnP5wuYMepJvLoPg6Z+7bp0tRZCCCGEEEIIIYQQfVSvz3i8//77Of/888nKyqK2tpbXXnuN5cuX89lnn3X3qQXl03VMBgMer1cyHoUQQgghhBBCCCFEn9XrA48lJSV885vfpKioiJiYGMaNG8dnn33G/Pnzu/vUgvLqXgyaEXDTB8prCiGEEEIIIYQQQggRVK8PPL7wwgvdfQqnxKvrGBuXWktXayGEEEIIIYQQQgjRV/XJGo89mU/3NQUedVlqLYQQQgghhBBCCCH6KAk8drGWgUfJeBRCCCGEEEIIIYQQfZUEHruYV/dh0oyAZDw
|
||
|
"text/plain": [
|
||
|
"<Figure size 1317.49x1250 with 30 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"sns.pairplot(data,hue='Clusters',palette='Spectral')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"attachments": {},
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Algorytm PCA"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 12,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"del data['Clusters']"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 13,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>X</th>\n",
|
||
|
" <th>Y</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>-0.733842</td>\n",
|
||
|
" <td>-0.981792</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>0.118314</td>\n",
|
||
|
" <td>-0.727320</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>0.433828</td>\n",
|
||
|
" <td>2.663883</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>0.725795</td>\n",
|
||
|
" <td>-0.774859</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>-0.195982</td>\n",
|
||
|
" <td>3.810580</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" X Y\n",
|
||
|
"0 -0.733842 -0.981792\n",
|
||
|
"1 0.118314 -0.727320\n",
|
||
|
"2 0.433828 2.663883\n",
|
||
|
"3 0.725795 -0.774859\n",
|
||
|
"4 -0.195982 3.810580"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 13,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"pca = PCA(n_components=2)\n",
|
||
|
"pca.fit(scaled_data)\n",
|
||
|
"x_pca = pca.transform(scaled_data)\n",
|
||
|
"x_pca = pd.DataFrame(x_pca)\n",
|
||
|
"x_pca.columns = ['X','Y']\n",
|
||
|
"x_pca.head()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 14,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<AxesSubplot:xlabel='X', ylabel='Y'>"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 14,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjUAAAGwCAYAAABRgJRuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAADsOElEQVR4nOydeXhU9b3/37NvySTDDEG2QGSiARIgiKIkQaVaRVCJVFv0XtmsbTXo/dFadgRB0GqtRazVFtTeq9gFQQW1VbFKcEViCTuBSMBAQiazZNYzc2Z+f0zOYZYzZyb7JHxez+MjmfU7Z5LzfZ/P9paEQqEQCIIgCIIgejnSnl4AQRAEQRBEZ0CihiAIgiCIPgGJGoIgCIIg+gQkagiCIAiC6BOQqCEIgiAIok9AooYgCIIgiD4BiRqCIAiCIPoE8p5eQHcSDAZRX1+PzMxMSCSSnl4OQRAEQRApEAqF0NLSgkGDBkEqTRyPuahETX19PYYOHdrTyyAIgiAIoh2cPn0aQ4YMSXj/RSVqMjMzAYQPil6v7+HVEARBEASRCg6HA0OHDuX38URcVKKGSznp9XoSNQRBEATRy0hWOkKFwgRBEARB9AlI1BAEQRAE0ScgUUMQBEEQRJ+ARA1BEARBEH0CEjUEQRAEQfQJSNQQBEEQBNEnIFFDEARBEESfgEQNQRAEQRB9AhI1BEEQBEH0CUjUEARBEATRJ7iobBKI9ON7qxu+QBAehoWbYaHXyKFXKzAwW9PTSyMIgiB6GSRqiB6jzuLC9zYPNn5cgz01Fv72snwT1s4oxDCjrgdXRxAEQfQ2KP1E9AhnrG5U1jTFCRoA2H28Ccu3HUC9zdNDqyMIgiB6IyRqiB6hxRvAAL06TtBw7K5pgt3j7+ZVEQRBEL0ZEjVEj+Dw+OELBEUf0+IlUUMQBEGkDokaokfQaxRQycV//TLVim5aDUEQBNEXIFFD9AiZajkaHV6UmI2C95eZTcjSkKghCIIgUoe6n4geYYhBixKzCcNN4Q6n2O6nx2cUYhC1dRMEQRBtgEQNkRS7m0GTk4HD64deo4BJp0SWVtnh18016iCTSrDm9kJ4/CzcvvbPqemqNRIEQRC9BxI1hCj1Ng8Wbd2P3ceb+Nsm55vwxMwxnRJJGWzQdvg1unqNBEEQRO+AamqIhNjdTJxYAIBPjzdh8db9sLuZHlrZBXrDGgmCIIjugUQNkZAmJxMnFjg+Pd6EJmfPC4besEaCIAiieyBRQyTEkWROTDrMkekNayQIgiC6BxI1REL0SebEpMMcmd6wRoIgCKJ7IFFDJMSUocTkfJPgfZPzTTBl9Hx3UW9YI0EQBNE9kKghEpKlVeKJmWPiRMPkfBOenDkmLVqme8MaCYIgiO5BEgqFQj29iO7C4XAgKysLdrsder2+p5fTa+BmwLR4/chUK2DKSL8ZML1hjQRBEET7SHX/pjk1RFKytOkvEHrDGgmCIIiuhdJPBEEQBEH0CUjUEARBEATRJyBRQxAEQRBEn6DXiJpVq1ZBIpFE/VdQUNDTyyIIgiAIIk3oVYXCo0ePxocffsj/LJf3quUTBEEQBNGF9CpVIJfLcckll/T0MnolXMuzw+uHXqOASZee3UK9ZZ0EQRBE+tGrRM3x48cxaNAgqNVqXHPNNVi/fj1yc3MTPt7n88Hn8/E/OxyO7lhm2lFv88Q5WU/ON+GJmWMwKFvTgyuLpreskyAIgkhPek1NzcSJE/HKK6/g/fffxwsvvIDa2lqUlZWhpaUl4XPWr1+PrKws/r+hQ4d244rTA7ubiRMKQNjBevHW/bC708PFureskyAIgkhfeu1EYZvNhmHDhuGZZ57B/PnzBR8jFKkZOnToRTVR+ESjEz945pOE93+08FqMyMnoxhUJ01vWSRAEQXQ/fX6icHZ2Ni677DLU1NQkfIxKpYJKperGVaUfDq9f9P6WJPd3F71lnQRBEET60mvST7E4nU6cOHECAwcO7OmlpDV6tUL0/swk93cXvWWdBEEQRPrSa0TNr371K3zyySf47rvv8Nlnn6G8vBwymQyzZs3q6aWlNaYMZZyDNcfkfBNMGenRWdRb1kkQBEGkL71G1Jw5cwazZs3C5ZdfjrvuugtGoxFffPEF+vfv39NLS2uytEo8MXNMnGCYnG/CkzPHpE27dG9ZJ0EQBJG+9NpC4faQaqFRX4Sb/9Li9SNTrYApIz3nv/SWdRIEQRDdR58vFCbaRqQwcHj9gCT+9nQgS0sihiAIgmgfJGouEtJlsF1vmBjcG9ZIEARBxEOi5iIg2WC752YVd/qm3eDwwupqFQZqBdQKKSSQYPn2A9hdk74Tg9NF/BEEQRBth0TNRUCTk4kTNByfHm9Ck5PpNFFjdzNodjNYuf0AdtdY+NvLzEY8eL0Z39RZ496/q4RVW+kJ8UcQBEF0HiRq+iD1Ng/sHj8cHj+yNAqo5FKYMpRocgpbDXTWYLt6mwefHD2PHdX12BMhaABgd40FQQDzSvOwcVf0wMTOFlbtpTvFH0EQBNH5kKjpY5yyuLB0W3WUqCg1G/HafVfjnj9/IShsOmOwHRflmDNpeJyg4dhTY8G8kjzB+9JhYjBNNSYIgujd9Jo5NURyzjS74wQNAFTWWLBmx0E8OXNM3HM6a7AdF+XwBYKij0t0fzpMDKapxgRBEL0bEjV9BLubgd3jTxglqayx4BK9Ouq2zhxsx0U5VHLxXymh+5MJqzNWNw6fdeDLkxYcOevAGau7Y4tNAE01JgiC6N1Q+qmPEG5BDog+xsUE8NHCa7tksB0X5ag6bUOJ2SgorsrMJjQ6vFG3JRNWidJpj5cXYZhR1ylr5+CmGi/euh+fxnQ/0VRjgiCI9IdETR/B4fUjQy0TfUymWoERORltel27m4HN7YeLCcDFsMjWKJCTqYrb4Lkox+bKWmyYVQwAUUKkzGzEiltH4cD3dvzzf8rgYdikwuqMNXE6bdm2ajwxcwyGGLRt+jzJGJStwXOzimmqMUEQRC+ERE0fQa9W4DuLC6VmIyoFoiSlZiOyNG2rCTlr8+BUsxvP7ToeLVBaIxeRc1sioxwPbanCvNI8vih4sEEDtVyKUAj4QUFOygKhxRsQTae1JIlMtReaakwQBNE7IVHTRzBlKPG7D45ixfTRWLPjYJSw4dI1scPjLgzIC0CvkcOgVWJAa92N3c3g38fOY8d+gfbsBHNbOjvK4fBQNxJBEASROiRqOomeHq2fpVVi6bRRWLfzEOaW5GHR1AK4fSxMGSoEgkE0uxgEgiF+XXUWF5YI1KqsKy9CrlGHJieDnExVwkhJorktnRnl0CeJLFE3EkEQBBEJiZpOIF1G68ukElRMyYfD64dCJsUlWQos3xZvS7CuvChhrcrSbdX47V3j4PD6k7ZntzdSkooAtLsZZCplKDObotbPUWo2IkMpXkNEEARBXFyQqOkg6TJaPzbyUjHFjKo6a5xw+fR4E+qa3YJ1N0BY2FhdDPRqBZpdwhOIOZJFShocXtjcDEIhIATA6w/AoFVhRRL/p7M2Dz4/aUHx0Gw8eP0IBBGKq+lZO6MQviADoHM7oAiCIIjeC4maDpIOo/UbHN64VFLx0Ow4OwIOW5JaFYc3gMsHZOCr75oTtmcnmtvS4PDC6mbg9AbgZ0PQqWR49sNj2HXkvKjQ4gQgAJxqdsPnZ7H8rQOoqrPxRce+QBAquRSNDi/sbgZZGirmJQiCIC5AoqaDJButb/OIRzs6A6uLiRMKYqmjZAPy9Go5srRKXHdZf+SZwpEQoe4nTqzZ3QyaXQwkEgmWbY+v05lTkocvTjaLCi1OAMqlEjy36zjmleTxryP0nHcWlIBhxdNjBEEQxMUFiZoOkmy0vs8fDEcVujBaIzR0T0y4VJ22idaqGHThtQ7M1kCrlGHdjCK4mADcDIusmDk1XD3RnEnDsXlPrWCdTghhI8tUanSUcin21Fhwz8Rhoo91elmwipDoYwiCIIiLCxI1HcSUoURZvkkwBVViNuKzkxYM0Ku7VNTo1fFfo9hk30P1dvzypsvjalW47qcBEXYKYt1MkfVES6cWoDjXwKeJ1AoZ9tVZsbm
|
||
|
"text/plain": [
|
||
|
"<Figure size 640x480 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"sns.scatterplot(x_pca,x='X',y='Y')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.8.10"
|
||
|
},
|
||
|
"orig_nbformat": 4,
|
||
|
"vscode": {
|
||
|
"interpreter": {
|
||
|
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
|
||
|
}
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|