{ "cells": [ { "cell_type": "code", "execution_count": 164, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from many_stop_words import get_stop_words\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from unidecode import unidecode\n", "from nltk.tokenize import word_tokenize\n", "import string\n", "import matplotlib.pyplot as plt\n", "from sklearn.cluster import KMeans" ] }, { "cell_type": "code", "execution_count": 165, "metadata": {}, "outputs": [], "source": [ "data=pd.read_csv('dev-0/in.tsv', sep='\\t', header=None)\n", "data_test=pd.read_csv('test-A/in.tsv', sep='\\t', header=None)" ] }, { "cell_type": "code", "execution_count": 166, "metadata": {}, "outputs": [], "source": [ "def remove_punctuations(text):\n", " for punctuation in string.punctuation:\n", " text = text.replace(punctuation, '')\n", " return text" ] }, { "cell_type": "code", "execution_count": 167, "metadata": {}, "outputs": [], "source": [ "data[0] = data[0].str.lower()\n", "data_test[0] = data_test[0].str.lower()\n", "stop_words = get_stop_words('pl')" ] }, { "cell_type": "code", "execution_count": 168, "metadata": {}, "outputs": [], "source": [ "data[0] = data[0].apply(unidecode)\n", "data_test[0] = data_test[0].apply(unidecode)\n", "uni_stop_words = [unidecode(x) for x in stop_words]" ] }, { "cell_type": "code", "execution_count": 169, "metadata": {}, "outputs": [], "source": [ "data[0] = data[0].apply(remove_punctuations)\n", "data_test[0] = data_test[0].apply(remove_punctuations)" ] }, { "cell_type": "code", "execution_count": 170, "metadata": {}, "outputs": [], "source": [ "data[0] = data[0].apply(lambda x: ' '.join([item for item in x.split() if item not in uni_stop_words]))\n", "data_test[0] = data_test[0].apply(lambda x: ' '.join([item for item in x.split() if item not in uni_stop_words]))" ] }, { "cell_type": "code", "execution_count": 171, "metadata": {}, "outputs": [], "source": [ "tf=TfidfVectorizer()\n", "text_tf= tf.fit_transform(data[0])\n", "text_test_tf= tf.fit_transform(data_test[0])" ] }, { "cell_type": "code", "execution_count": 174, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 opowiesc prawdziwa olsztyn akademik 7 pietro i...\n", "1 podejrzewam polowaniu mowy prostu znalazl mart...\n", "2 smutne przypomina historie balwankami wredny f...\n", "3 kumpla zdawal walentynki polozyl koperte laski...\n", "4 przypomniala krakowskich urban legends chyba n...\n", " ... \n", "82 wczoraj popoludniowej audycji trojce prowadzac...\n", "83 sluchajcie uwielbiam opowiadacv sluchac jakies...\n", "84 wczoraj probie koncertu czwartkowego akompania...\n", "85 zuzanna mala historia przyszla panna mloda kup...\n", "86 koszmar zaczyna niewinnego spotkania jednym to...\n", "Name: 0, Length: 87, dtype: object" ] }, "execution_count": 174, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[0]" ] }, { "cell_type": "code", "execution_count": 173, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEWCAYAAACOv5f1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAtx0lEQVR4nO3dd5xU5fXH8c8BpIOgECU2kCjGBuKiiIIiKjbAXiKKWJAoYkPEGqNJFI0aK7GAvaAUUUFFcEWNoi6IgC0IP6yAFBVEI8Xz++O5K8Oy7S4ze2d3vu/Xa147c+fembPDcu6dp5zH3B0REcktNZIOQEREKp+Sv4hIDlLyFxHJQUr+IiI5SMlfRCQHKfmLiOQgJX/ZgJmdbmZvpjx2M/tDkjGlSzp/FzObb2YHpeO1kmZmp5jZxAy99mtmdlYJz11rZo9l4n2ldEr+OSpKXD+b2Y8pt7uSjgt+O/m4md1WZHuvaPtD5XydEpNOppnZQ2a2qsjne2KaXruOmd1gZl9E/4ZzzOxSM7NyHt8y+hxrFW5z98fd/ZB0xCdVQ62yd5FqrIe7T0o6iBLMBU4ws0vdfU20rQ/w3wRjiusmd7+qogebWa2U3z3VM8CWwOHAJ0Ae8CiwDTCwou8nuUVX/lJeh5vZPDNbYmY3m1kNADOrYWZXmdnnZvatmT1iZptGzz1sZpdE97eKrjbPix63NrNlha9TjIXALKB7tP9mQCfgudSdzKyjmb1lZt+b2QdmdkC0/e9AZ+CuYr7VHBRdLX9vZncXXjGX9rtEz58aPbfUzK6s6AdpZmeb2WfR7/+cmf0+5Tk3s/PMbA4wp5hjuwGHAMe6+2x3X+PuU4HewHmFTVrRt54bzOxdM1tuZuOizxDg9ejn99Fns08JTX3nRp/TCjO7Pvo3eyt6vafNrHa0b1Mze8HMFpvZd9H9rSvwuWxiZk+a2ejC15bMUfKX8jqacIXZHugFnBFtPz26dQW2BxoChYl2CnBAdH9/YB7QJeXxG+7+aynv+QhwWnT/JGAc8Evhk2a2FTAe+BuwGTAIGG1mzd39SuANYIC7N3T3ASmveyTQAdgdOIHoBFPa72JmOwPDgFOB3wObAxVJcAcCN0Tv2wL4HHiqyG5HAXsDOxfzEgcD77j7l6kb3f0d4CugW8rm0wj/Ti2ANcAd0fbCf4Mm0Wfzdgnhdgf2BDoCg4H7CCeZbYBdgZOj/WoADwLbAdsCP7Pub6BczKwe8Czh3/cEd18V53iJT8k/tz0bXf0W3s4uZd+h7r7M3b8A/sW6//inALe6+zx3/xG4HDgpak+eAuwXXd13AW4C9o2O2z96vjRjgQOiq+/TCCeDVL2BCe4+wd1/dfdXgAJCc0hpbnT376PfJR9oV47f5TjgBXd/3d1/Aa4GSjtxAQxK+WyXpLzHCHefHr3O5cA+ZtYy5bgbos/652JesxmwoIT3WxA9X+jR6NvByijeE8ysZhkxp7rJ3Ze7+4fAbGBi9Nn8ALwI7AHg7kvdfbS7/+TuK4C/E/59y6sx8BKhqa+vu6+NcaxUkJJ/bjvK3Zuk3O4vZd/UK83PCVe/RD8/L/JcLWALd58LrCQk187AC8A3ZtaGciT/KPmNB64CNnf3/xTZZTvg+NQTGLAf4Uq3NAtT7v9EuMIv9XeJnvvtM4gS6tIy3uefKZ9tYVJe7z2ik8xSYKuU49a7qi9iCSX/fi2i54t7nc+BTVj/5FCWRSn3fy7mcUMAM6tvZvdGTWLLCc1KTWKcaDoSvoXd6Ko0WWmU/KW8tkm5vy3wTXT/G0ISTn1uDesSxRTCVXNtd/86etwHaArMKMf7PgJcAhQ3HPBLwtVt6gmsgbvfGD0fN5GU9rssIOUzMLP6hKafuNZ7DzNrEL3O1yn7lBb3JGBvM0v998DM9o7iezVlc9F/s9WEk0O6E+wlQBtgb3dvzLpmpXKNPgImEprCJpvZFmmOTUqg5C/ldWnUsbcNcAEwMtr+JHCRmbUys4bAP4CRKaNUpgADWNfJ+Fr0+M1yfr2fQmjnvrOY5x4DephZdzOraWZ1zeyAlM7GRYS2+/Iq7XcZBRxpZvtFnZHXUbH/P08Cfc2snZnVid7jHXefX56Do9FZkwl9G7tEv3dHwmcxzN1TO4l7m9nO0YnqOmBU9JkvJjRZxflsStOI8E3g+6hT+S9xX8DdbwKeIJwA4nw7kQpS8s9tz9v649DHlrLvOGAa4Wp9PDA82j6CMMzwdeD/gP8B56ccN4WQHAqT/5tA/ZTHpfJgsrsvK+a5Lwmdz1cQEtqXwKWs+7u+HTguGoFyR9Hji1Hi7xK1e59HSFALgO8IHayxRMn7amB09DqtCZ3ZcRxL6Kt4CfiRkPiHs/7nTvS7PERo5qpLNAzU3X8itMv/J2ou6xj39yjiX0A9wreKqVFcsbn79YRO30kpI5MkQ0xNbCLVj5m9Bjzm7g8kHYtkJ135i4jkICV/EZEcpGYfEZEcpCt/EZEcVGUKuzVr1sxbtmyZdBgiIlXKtGnTlrh786Lbq0zyb9myJQUFBUmHISJSpZjZ58VtV7OPiEgOUvIXEclBSv4iIjlIyV9EJAcp+YuI5KBqm/xvugny89fflp8ftouI5Lpqm/w7dIATTlh3AsjPD487dEg2LhGRbFBlxvnH1bUrPP00HH00dO4MU6eGx127Jh2ZiEjyqu2VP4RE36IFvPACdOyoxC8iUqhaJ//8fFiyBFq3DieAwYOTjkhEJDtU2+Rf2Mb/9NMwezbk5cHNN8OQIUlHJiKSvGqb/N97b10bf9268PrrsOeeMHQoPFbcUuAiIjmk2nb4Fm3iqVcvnAB69IA+faBGDfjTn5KJTUQkadX2yr849evD88/D/vvDqafCU08lHZGISDJyKvnDuhNA587Qu3doGhIRyTU5l/wBGjQIo386dQpNP6NGJR2RiEjlysnkD9CwIYwfH8b/n3wyjBmTdEQiIpUnZ5M/QKNG8OKLoeTDiSfCs88mHZGISOXI6eQP4QTw0kthGOgJJ4T+ABGR6i7nkz9A48bw8svQrh0ce2zoDxARqc6U/CObbgoTJ0LbtuEEMGFC0hGJiGSOkn+KJk3CCWDXXeGYY8K3ARGR6ijjyd/MLjKzD81stpk9aWZ1zayVmb1jZp+Z2Ugzq53pOMqraVN45RX44x+hV69wMhARqW4ymvzNbCtgIJDn7rsCNYGTgKHAbe7+B+A74MxMxhHXZpvBpEnQpg0ccQT885/rP68VwUSkqquMZp9aQD0zqwXUBxYABwKFU6seBo6qhDhi2XxzmDwZttkGLr0UbrklbNeKYCJSHWS0sJu7f21m/wS+AH4GJgLTgO/dfU2021fAVpmMo6KaNYN33oG99oJBg2DatNAkpBXBRKSqy3SzT1OgF9AK+D3QADg0xvH9zKzAzAoWL16coShL17x5OAE0awZPPhlGBTVpkkgoIiJpk+lmn4OA/3P3xe6+GhgD7As0iZqBALYGvi7uYHe/z93z3D2vefPmGQ61ZB9+GH526wbz5kH79mFG8KefJhaSiMhGyXTy/wLoaGb1zcyAbsBHQD5wXLRPH2BchuOosNQVwSZNgnHjQmXQ556DXXaBs86CL79MOkoRkXgymvzd/R1Cx+50YFb0fvcBlwEXm9lnwObA8EzGsTFSVwSDsBjMCy+EPoABA+DRR+EPf4CLLoKEWqZERGIzd086hnLJy8vzgoKCpMPYwOefw3XXwUMPhW8EF10El1wS+gZERJJmZtPcPa/ods3w3UjbbQfDh4d+gcMOg+uvh+23D3MDfv456ehERIqn5J8mO+0UmocKCsLQ0EsvDc1B994Lq1cnHZ2IyPqU/NNszz3DGgFTpkDLltC/fygVcfLJYdJYKs0UFpGkKPlnSJcu8OaboXO4YcOwWHz37vD3v4O7ZgqLSLKU/DPILNQGmj49TBDbYgu46qpQM+j44zVTWESSo+RfCWrUgJNOgvnz4eCDYc4c2GQT2HHHpCMTkVyl5F+J3nwT3n8/tP8vXBhWDvvgg6SjEpFcpORfSVJnCj/xBNx/PyxbBh07atUwEal8Sv6VpOhM4bPOCp3Am20WZg3ffXey8YlIblHyrySDB2/YuXv88aE43BFHhFIRF10Ea9cmE5+I5BYl/4Q1bAhjx8IFF8C//hXWDl65MumoRKS6K3fyN7N9zaxBdL+3md1qZttlLrTcUbNmSPx33hnmBXTpAt98k3RUIlKdxbnyHwb8ZGZtgUuAucAjGYkqRw0YEEpFf/pp6AieOTPpiESkuoqT/Nd4KAHaC7jL3e8GGmUmrNx1xBFhSOjatbDffvDSS0lHJCLVUZzkv8LMLgdOBcabWQ1gk8yEldvatQtLR7ZuDUceCcOGJR2RiFQ3cZL/icAvwBnuvpCw/OLNGYlK2HpreP11OPRQOPfcsEaARgKJSLqUO/lHCX80UCfatAQYm4mgJGjUCJ59NvQF3HorHHecRgKJSHrEGe1zNmFJxnujTVsBz2YgJklRq1YYBXT77WH94DZtYPTo9fdRaWgRiStOs895wL7AcgB3nwP8LhNByYYGDgzfApYsCWUiRowI21UaWkQqIk7y/8XdVxU+MLNaQNVYALia6NkT/vMfaNIklIc44YR19YJUGlpE4oiT/KeY2RVAPTM7GHgGeD4zYUlJ9twTZswIawM88wy0aBFGB4mIxBEn+Q8BFgOzgHOACcBVmQhKSvfZZ7BmDXTuDLNmhbWCX3gh6ahEpCqJk/zrASPc/Xh3Pw4YEW2TSpRaGvr11+Hf/4YffgiVQU8/Hb7/PukIRaQqiJP8J7N+sq8HTEpvOFKWoqWhzzkHxo+HAw+Exx6DXXcNC8iLiJQmTvKv6+4/Fj6I7tdPf0hSmuJKQ3fvDpMnw9SpsOmmcPjhcOaZ4RuBiEhx4iT/lWbWvvCBme0J/Jz+kKSi8vLCYvGXXw4PPRS+BUycmHRUIpKN4iT/C4FnzOwNM3sTGAkMyEhUUmF16sA//gFvvx3WCujeHfr1g+XLk45MRLJJnPIO7wE7AX8G+gN/dPdpmQpMNs5ee4XF4gcPhuHDYbfdYJJ6aEQkEnclrw7A7kB74GQzOy39IUm61K0LQ4eGiWH16sHBB0P//rBiRdKRiUjS4tT2eRT4J7Af4STQAcjLUFySRh07hm8BgwbBffeFbwH9+oVho6lUI0gkd1hYn6UcO5p9DOzs5T0gzfLy8rygoCCJt65W3norzAeYMyd8Mxg9OowOSp0/oFIRItWHmU1z9w0u1OM0+8wGtkxfSJKETp1CeYiLLoL//S9MDjvpJCV+kVxTK8a+zYCPzOxdwqIuALh7z5IOMLM2hFFBhbYHrgGaAGcTykUAXOHuE2LEIhuhfv2wPsAxx0CvXjByJGyzDdSunXRkIlJZ4iT/a+O+uLt/CrQDMLOawNeEBWD6Are5+z/jvqakz+rVUKNGaPZ58cWwZnCPHmGo6K67Jh2diGRSnKGeU4q7xXivbsBcd/88fpiSbqlt/OPHh8Jw9evDq6/C7rtDnz7wuf6lRKqtOKN9OprZe2b2o5mtMrO1ZhZn6tBJwJMpjweY2UwzG2FmTUt4z35mVmBmBYsXLy5uF6mgojWCDj88nAAGDQq3kSNhxx1D38CSJcnGKiLpF2e0TwEhgT9DGOJ5GrCju19ejmNrA98Au7j7IjPbgrAGsAPXAy3c/YzSXkOjfSrXV1/BtdfCgw9CgwZw6aXhRNCwYdKRiUgc6Rjtg7t/BtR097Xu/iBwaDkPPQyY7u6LotdZFL3Gr8D9wF5x4pDM23preOABmD0bDjoIrrkGWreGu+6CVavKPl5Esluc5P9TdAU/w8xuMrOLYhx/MilNPmbWIuW5ownDSCUL/fGPMGZMqBi6885w/vmw007w+OPw669JRyciFRUn+Z8a7T8AWAlsAxxT1kFm1gA4GBiTsvkmM5tlZjOBrsBFMeKQBOy9d+gMfvHFUDa6d29o3x7OOCNsT6WZwiLZL07yP8rd/+fuy939r+5+MXBkWQe5+0p339zdf0jZdqq77+buu7t7T3dfUJHgpXKZwaGHwrRp8MQToUbQgw+GbXffHfYpHEXUoUOysYpI6eIk/z7FbDs9TXFIFVKjBpx8Mnz8cegDaNAABgwI3w40U1ikaihzkpeZnQz8CWhlZs+lPNUYWJapwCT71a4N550X5gR06wbvvhs6itu0SToyESlLeWb4vgUsIJR3uCVl+wpgZiaCkqrlvfdg3rxQLmLMGNhlFxg7Fg44IOnIRKQkZTb7uPvn7v4acBDwRjSrdwGwNWCZDU+yXepM4dGjYcSI0Bdw4IGh0zeZGrAiUpY4bf6vA3XNbCtgImH0z0OZCEqqjqIzhfv2hXHjQm2gyy4L3wa0kLxI9omT/M3dfyIM77zH3Y8HdslMWFJVDB68YefuEUfABx/AbbeFkhF5eTBTDYQiWSVW8jezfYBTgPHRtprpD0mqAzO48MLQLLRyZVhN7LHHko5KRArFSf4XApcDY939QzPbHsgv/RDJdfvtB9Onh2Ggp54K554Lv/xS9nEikllxSzr3dPeh0eN57j4wc6FJdbHllvDKK6GJaNgw6NIFvvgi6ahEcluZyd/M/hX9fN7Mnit6y3iEUi3UqgVDh4ahoB9/HEpDTJyYdFQiuas84/wfjX5q1S3ZaEcfHeYBHHtsKAtx3XVwxRVh1rCIVJ4yk7+7T4t+xlm1S6REO+4YqoT27w9XXw1vvw2PPgqbbZZ0ZCK5ozzlHWYRFl0plrvvntaIJCc0aACPPAKdOoW6QDvvDBMmhOYgCKOE3nsv9BOISPqVp9mnsHLnedHPwmag3pRyUhApixn8+c9hFvD554fhoMOGQatWcOKJYfKYiGRGnGUc33f3PYpsm+7u7TMSWRFaxrF6e/bZUCZi9epQMG7ECDjllKSjEqn60rGMo5nZvikPOsU8XqRERx21ronHHc46C66/XnMCRDIlTvI+E7jHzOab2XzgHqDURddFyis/H+69N3QAN2oUmoCuuQZ2201DQkUyIc4kr2nu3hZoC7R193buPr3weTMrbrEXkTKlVga97joYNSosHD90aPgW0L176AP4+uukIxWpPmI327j7D6lLMqa4IA3xSA4qWhm0a9d1nb2zZoUTwrhxYeH4226DNWuSi1Wkuih3h2+ZL1RMh3A6qcM3t82dCwMHhuGgu+8O99wD++5b9nEiuS4dHb5l0bBPyZjWrUN56DFj4LvvQsG4M8+EJUuSjkykakpn8teqXpJRZqE8xEcfhZFBjzwS1gu+/3749dekoxOpWtKZ/P+TxtcSKVHDhqEzeMaMMBqoX78wU/iCC0Lncar8/LCcpIisr8w2fzO7uLTn3f3WtEZUArX5S3Hc4fHH4ZJLYPFiqFMndBb36LH+KKKiq42J5IqNafNvFN3ygD8DW0W3/kClzO4VKYkZ9O4Nn34aFor53/+gV6+wdrASv0jJ4pR3eB04wt1XRI8bAePdvUsG4/uNrvylPAoKQvL/5ptQOvrtt8OkMZFclY7RPlsAq1Ier4q2iWSNFStg1So44AD48MNQLVSLx4tsKE7yfwR418yuNbNrgXeAhzMSlUgFpLbx5+eHCWHffAN5eXDffaF/QESCOOUd/g70Bb6Lbn3d/R+ZCkwkrqIzhS+8EJ55Blq2hHPOCVVCV6xIMkKR7BF3qGd9YLm73w58ZWatMhCTSIUMHrxh5+4xx8Ann8Df/gYjR4ZvAWoGEomR/M3sL8BlwOXRpk2AxzIRlEg61agBV14Jr74arvz33jtMDFMzkOSyOFf+RwM9gZUA7v4NYQhoicysjZnNSLktN7MLzWwzM3vFzOZEP5tW/FcQKZ/99w8Twzp3DhPDevdWM5DkrjjJf5WHcaEOYGYNyjrA3T+NSj+3A/YEfgLGAkOAye6+AzA5eiyScb/7Hbz0UmgGeuopNQNJ7oqT/J82s3uBJmZ2NjAJuD/G8d2Aue7+OdCLdSOFHgaOivE6IhtFzUAi5Uz+ZmbASGAUMBpoA1zj7nfGeK+TgCej+1u4+4Lo/kJKmC9gZv3MrMDMChYvXhzjrUTKpmYgyWVxZvjOcvfdKvQmZrWBb4Bd3H2RmX3v7k1Snv/O3Utt99cMX8mUX3+FG24Iy0b+4Q9heOjuuycdlUh6pGOG73Qz61DB9z8MmO7ui6LHi8ysRRRYC+DbCr6uyEYrbAaaPBmWL4c99wyF4lKvi1QdVKqbOMl/b+BtM5trZjPNbJaZlber7GTWNfkAPAcUrvnbBxgXIw6RjDjggNAM1LYt3HorHHxwaAYqnDncoaKXPiJZqFaMfbtX5A2iUUEHA+ekbL6R0IF8JvA5cEJFXlsk3bbYAt59F84+G0aMCLOD166FsWNVHVSql3In/2iUDmb2O6BujONWApsX2baUMPpHJOvUqAHDh8Pq1fDoo2HbQw/BrrtC8+aJhiaSNnFm+PY0sznA/wFTgPnAixmKSyRR+fnw4oswZAjUqxcWjGnTBh54QEtGSvUQp83/eqAj8F93b0W4cp+akahEEpRaHfSGG2D8eGjcGLbeOjQHde4Ms2YlHaXIxomT/FdHzTU1zKyGu+cTVvcSqVaKVgft2hVGjw5VQR98MKwatsceoZDcypXJxipSUXHG+U8izMS9AWhGGJ7Zwd07ZSy6FBrnL9li6VK47LLQL7DttnDXXWHNYJFslI5x/r2An4GLgJeAuYD+5CXnbL55aPt/442wRGTPnnD00fDFF0lHJlJ+cRZzWenua919jbs/7O53RM1AIjlpv/3g/fdh6FB4+eWwZOQtt4RRQiLZLs5onxVRSeblZvY/M1trZsszGZxItttkk9D2/9FHoW9g0KBQKXSqhkJIlotz5d/I3Ru7e2OgHnAscE/GIhOpQlq2hOeegzFjYNky6NQJOnYM21KpTIRki7jLOALgwbNUcNavSHVkFtr+P/oILroojBo66ii44opQJ0hlIiSbxBntc0zKwxqEYZ77u/s+mQisKI32kapmxgw4+eSwhnDLlqFo3KhRKhMhlSsdo316pNy6AysII4BEpBjt2sGHH8Lhh8P8+fDjjzBvnhaNkewQp7ZP30wGIlIdTZkSCsWdfz78+99w1lmhSNz990OLFklHJ7ms3MnfzO4o7Xl3H7jx4YhUH6llIrp2hV69wm3iRNhlF7jnHjjppKSjlFwVp9mnLtAemBPd2gG1gWnRTURSFC0T0a0bPP88XHAB7Lhj6A848URYsiTZOCU3xenwnQrs5+5rosebAG+4e8cMxvcbdfhKdbJmTRjyee21sNlmYcbwkUcmHZVUR+no8G0KNE553DDaJiIx1aoVhoC+915YQKZHDzjzzDAiSKQyxEn+NwLvm9lDZvYwMB34R2bCEskNbduGE8AVV4QFY3bbDV59NemoJBfEmeH7IGEd37HAGGAfd384U4GJ5IrateHvf4e33oK6dUPfwMCB8NNPSUcm1Vmc2j77AivcfRzQCBhsZttlLDKRHLP33qFQ3AUXwJ13hnkCb7+ddFRSXcVp9hkG/GRmbYGLCSWdH8lIVCI5qn59+Ne/QtPPqlWhcmjXrqFqaCrVCJKNFSf5r/EwNKgXcLe73034BiAiada1K8ycCX37wmuvwRFHhBFBoBpBkh5xkv8KM7sc6A2MN7MawCaZCUtEGjcOCf+FF8L9s88OJ4XUiWMiFRUn+Z8I/AKc6e4Lga2BmzMSlYj85ogjYM6cMCv4tdegRo1wMhDZGHFG+yx091vd/Y3o8Rfu/lubv5mpa0okQ2bOhEWLwlX/kiWhyWfwYI0IkoqrUD3/EtRN42uJSCS1RtDIkfDss1CnDtx8c5gn8NprSUcoVVE6k78K1YpkQNEaQT16wIQJ0K9fKA/dtSv07w8//JBsnFK1lLu2T5kvZDbd3dun5cWKodo+Ihv66Sf4y1/g1ltDiehhw8LJQaRQhWv7mFmd8r5H7KhEZKPUrx+af6ZODQXievYM1UK//TbpyCTblafZ520AM3u0jP1O3fhwRKQiOnSAggK47rqwiPzOO8Njj2nVMClZeZJ/bTP7E9DJzI4peivcyd1nZy5MESlL7dpw9dWhRMSOO8Kpp4Zhol98kXRkko3Kk/z7A52BJqy/jm8PQBXIRbLMzjvDG2/A7bfD66+H+QF33w2//pp0ZJJNykz+7v6mu/8ZGOzufYvczijreDNrYmajzOwTM/vYzPYxs2vN7GszmxHdDk/LbyMiANSsGSqDzp4NnTrBgAHQujU8XKQOr2oE5a44Qz0fNbOBUSIfZWbnR6t5leV24CV33wloC3wcbb/N3dtFtwlxAxeRsrVsCS+9FNYKWLIETj89LCK/erVqBOW6OMn/HmDP6Oc9hPV8h5V2gJltCnQBhgO4+yp3/75CkYpIhZhBnz6hRMT++8Pw4bDddnDssaoRlMviJP8O7t7H3V+Nbn2Bsq4ZWgGLgQfN7H0ze8DMGkTPDTCzmWY2wsyKXQ7SzPqZWYGZFSxevDhGqCJS1JZbhtnAxx0HCxbAypWwcGHSUUlS4iT/tWbWuvCBmW0PrC3jmFpE3xDcfQ9gJTCE8I2hNdAOWADcUtzB7n6fu+e5e17z5s1jhCoixcnPDyeAgQNDB/Cf/hTWDl65MunIpLLFSf6XAvlm9pqZTQFeBS4p45ivgK/c/Z3o8Sigvbsvcve17v4rcD+wV9zARSSe1BpBt98OL74I9erBiBGh3X+2BmvnlDhVPScDOwADgfOBNu6eX/i8mR1czDELgS/NrE20qRvwkZm1SNntaEB/diIZVrRG0EEHwfjxoQN42bJwArjvPk0MyxUZr+1jZu2AB4DawDygL3AHocnHgfnAOe6+oLTXV20fkcxZtAhOOw0mToTjj4f774dNN006KkmHkmr71ErnexS30d1nAEXfWKUgRLLIFluEZqCbb4YrrwylIp56CvZSg2y1pZLOIgKEFcIuuyzMDl67FvbdF265RTODq6t0Jn8RqQb22QdmzAiloQcNCj810rr6SWfyn5/G1xKRBDVtCqNHh5pAkydDu3ZaMay6KXfyN7OaZtYzKvFwceGt8Hl3P6a040WkajGDc88NawU0bAgHHhgWjllb1uweqRLiXPk/D5wObA40SrmJSDXWrh1MmxZKRF93HeywQxgymkoF4qqeOKN9tnb33TMWiYhkrYYNQ0XQbt3gnHPgpJPgs8/giivWnzwmVUecK/8XzeyQjEUiIlnvtNNCZ/D224choe3bh1pBKhBX9cRJ/lOBsWb2s5ktN7MVZrY8U4GJSHZq0yaUgth777Bq2A8/wLhxYaKYVB1xkv+twD5AfXdv7O6N3L1xhuISkSz29tswd24oELfJJnDnneHbwJAhsHRp0tFJecRJ/l8Csz1d9SBEpEoqWiBuwoRQCmKffUKnb6tWcO214RuBZK84yX8e8JqZXV7cUE8RyQ1FC8R17RrmBBxyCMyaFX7+9a/hm8DQoSoXna3KXdjNzP5S3HZ3/2taIyqBCruJVB3TpsE114RvBb/7XRgVdM45ULdu0pHlnpIKu6WtqmemKfmLVD1vvQVXXw2vvgpbbRXu9+0LtWsnHVnuKCn5x5nhm29mrxa9pTdMEalOOnUK5SEmT4Ztt4X+/WGnneCRR+DGG0P/QSpNFqs8cdr8BxFW87oUuBqYAehSXETKdOCB8J//hGagpk3DgvL33AO9eoUTA6zrSO5Q1srgkhYb1exjZu+6e6VU/Fazj0j14A5jx4Y+gQ8/hJo1wwIykyZpslgmpKPZZ7OUWzMzOxTQWj8iEosZHHMMfPABPP44NG4cFo5p1AiaNUs6utwRp9lnGqGZpwB4C7gYODMTQYlI9VezJrRoEX4ecgjMnw9t20K/frBwYdLRVX9lJn8z62BmW7p7K3ffHvgr8El0+yjTAYpI9ZQ6Wezll0NTUN26MHx4qBx6ww3w889JR1l9lefK/15gFYCZdQFuAB4GfgDuy1xoIlKdFZ0s1qsXjB8PF18cqodecUUYGfTkk6GfQNKrzA5fM/vA3dtG9+8GFrv7tdHjGe7eLtNBgjp8RXJNfn44EcyYEYrI3XprGDoq8WxMh29NMyus+98NSB3bH2c9ABGRcuvaFQoK4MEH4YsvwoLyJ54Y+gZk45Un+T8JTDGzccDPwBsAZvYHQtOPiEhG1KwJp58O//1vGBr6/POhKWjIEBWO21jlGudvZh2BFsBEd18ZbdsRaOju0zMbYqBmHxH56quwiMwjj0Dz5mFZye+/D81CqfMD8vNDn8LgwYmFmjU2apy/u09197GFiT/a9t/KSvwiIgBbbx2Wk3zvvfAN4M9/hnvvhaOPXlcqQjOFyyfOOH8RkayQlwdTpoRS0jVqhCag7t3hvPPWDR/VTOHSKfmLSJVUOFP4o4/gllvCSeCee2D33aFz56Sjy35K/iJSpdWpA3vsAQ0ahPWFX30VdtklLCwjJVPyF5EqrbCNf9Qo+PjjsGbAnDnhhPDXv8KqVUlHmJ2U/EWkSkudKWwWRgCNGQO77RbWEs7LC/MFZH1K/iJSpQ0evGHn7lFHwfvvw7hxsGRJGAo6ZIhqBaVS8heRaqtnz9Ah3LdvWEy+XbuwqIxUQvI3syZmNsrMPjGzj81sn2hNgFfMbE70s2mm4xCR3NSkCTzwQKgc+ssvYSTQBRfAypVlHlqtVcaV/+3AS+6+E9AW+BgYAkx29x2AydFjEZGMOeSQMALo3HPhjjtCn0DhEpK5KKPJ38w2BboAwwHcfZW7fw/0IpSFJvp5VCbjEBGBsFrYXXeFCWI1a8JBB4XFY3KxTlCmr/xbAYuBB83sfTN7wMwaAFu4+4Jon4XAFsUdbGb9zKzAzAoWL16c4VBFJFd06QIzZ8Kll4bFY3bZJfQLFJaIKJSfDzfdlEyMmZbp5F8LaA8Mc/c9gJUUaeLxUFmu2Opy7n6fu+e5e17z5s0zHKqI5JJ69UJif/vt0C/w0ENw2GHw7LPh+epeIyjTyf8r4Ct3fyd6PIpwMlhkZi0Aop/fZjgOEZFi7bUXTJsWJoetXh1KRnTtCscdV71rBGU0+bv7QuBLM2sTbepGWPf3OaBPtK0PMC6TcYiIlKZOnTA5bPp02HZbeO01WLYMrroKhg2DpUuTjjD9KmO0z/nA42Y2E2gH/AO4ETjYzOYAB0WPRUQStWxZGAI6cGCoFfT112F00JZbhjkDTz9dfSaKZXwZRnefAWywkADhW4CISFYobOMvbOo56qjw+L77wkpiTzwRVhJr1AiOPRZ694YDDgijhqoizfAVEWH9GkEQfj79NHz3Hdx8c1hHeNKk0BcwenQYJrrttjBoUFhkvnBRxJtuqhqjhsq1jGM20DKOIpItfv4ZXngBHnsMJkyANWvCcNFTToGWLUOzUeGJpOg3ispW0jKOSv4iIhth6VJ45plwIiisG7T77jBvHgwYEEpLJDlqaKPW8BURkeJtvjn07w9vvhkS/t/+FtYQ+PFHuPHGUFE0G1cWU/IXEUmTVq3gyivh7rth001hq61g/Piw/Zln4Ndfk45wHSV/EZE0ys+HE0+EsWPhyy/h+uthwYJ1s4UnTlzXOZwkJX8RkTQqurLYVVfBSy+F5L90KXTvDt26wTvvlP1amaTkLyKSRsWtLHbQQTByJHz6aSgnPXs2dOwYSkl8/HEycSr5i4hUkjp14PzzYe7cUE5i0iTYdVc444wwj6AyKfmLiFSyRo1CIbl58+DCC8Ps4R12gIsvhsqqXq/kLyKSkGbN4JZbQvmI3r3h9tuhdeuw6tj48evvm+5Zwkr+IiIJ23bbsKjM7Nkh8b/yCvToESaJ/fJLZtYW0AxfEZEs8+67YeLY++9D48ZQowaMGVOxWcKa4SsiUkXstVdYW+CUU2D5cjj77PSXh1DyFxHJQvn58PLLoWP4wQc3rBS6sZT8RUSyTGol0OuuCz9POCG9JwAlfxGRLFPS2gLvvZe+91CHr4hINaYOXxER+Y2Sv4hIDlLyFxHJQUr+IiI5SMlfRCQHVZnRPma2GPg8g2/RDFiSwdfPBMWceVUtXlDMlaWqxLyduzcvurHKJP9MM7OC4oZDZTPFnHlVLV5QzJWlKsacSs0+IiI5SMlfRCQHKfmvc1/SAVSAYs68qhYvKObKUhVj/o3a/EVEcpCu/EVEcpCSv4hIDsqp5G9m25hZvpl9ZGYfmtkFxexzgJn9YGYzots1ScRaJKb5ZjYrimeD0qYW3GFmn5nZTDNrn0ScUSxtUj67GWa23MwuLLJP4p+xmY0ws2/NbHbKts3M7BUzmxP9bFrCsX2ifeaYWZ+EY77ZzD6J/t3HmlmTEo4t9W+okmO+1sy+Tvn3P7yEYw81s0+jv+shCcc8MiXe+WY2o4RjE/mcK8Tdc+YGtADaR/cbAf8Fdi6yzwHAC0nHWiSm+UCzUp4/HHgRMKAj8E7SMUdx1QQWEiaZZNVnDHQB2gOzU7bdBAyJ7g8BhhZz3GbAvOhn0+h+0wRjPgSoFd0fWlzM5fkbquSYrwUGleNvZy6wPVAb+KDo/9XKjLnI87cA12TT51yRW05d+bv7AnefHt1fAXwMbJVsVGnRC3jEg6lAEzNrkXRQQDdgrrtncmZ2hbj768CyIpt7AQ9H9x8Gjirm0O7AK+6+zN2/A14BDs1UnKmKi9ndJ7r7mujhVGDryoilvEr4nMtjL+Azd5/n7quApwj/PhlXWsxmZsAJwJOVEUsm5VTyT2VmLYE9gHeKeXofM/vAzF40s10qN7JiOTDRzKaZWb9int8K+DLl8Vdkx0ntJEr+T5JtnzHAFu6+ILq/ENiimH2y9bMGOIPwDbA4Zf0NVbYBUVPViBKa17L1c+4MLHL3OSU8n22fc4lyMvmbWUNgNHChuy8v8vR0QjNFW+BO4NlKDq84+7l7e+Aw4Dwz65J0QGUxs9pAT+CZYp7Oxs94PR6+w1eZcdBmdiWwBni8hF2y6W9oGNAaaAcsIDSjVBUnU/pVfzZ9zqXKueRvZpsQEv/j7j6m6PPuvtzdf4zuTwA2MbNmlRxm0Zi+jn5+C4wlfCVO9TWwTcrjraNtSToMmO7ui4o+kY2fcWRRYXNZ9PPbYvbJus/azE4HjgROiU5aGyjH31ClcfdF7r7W3X8F7i8hlmz8nGsBxwAjS9onmz7nsuRU8o/a64YDH7v7rSXss2W0H2a2F+EzWlp5UW4QTwMza1R4n9DBN7vIbs8Bp0WjfjoCP6Q0XySlxCukbPuMUzwHFI7e6QOMK2afl4FDzKxp1FxxSLQtEWZ2KDAY6OnuP5WwT3n+hipNkf6oo0uI5T1gBzNrFX2LPInw75Okg4BP3P2r4p7Mts+5TEn3OFfmDdiP8FV+JjAjuh0O9Af6R/sMAD4kjC6YCnRKOObto1g+iOK6MtqeGrMBdxNGR8wC8hKOuQEhmW+asi2rPmPCiWkBsJrQnnwmsDkwGZgDTAI2i/bNAx5IOfYM4LPo1jfhmD8jtI0X/j3/O9r398CE0v6GEoz50ejvdCYhobcoGnP0+HDCiLy5ScccbX+o8G84Zd+s+JwrclN5BxGRHJRTzT4iIhIo+YuI5CAlfxGRHKTkLyKSg5T8RURykJK/SAWZWcvUyo8iVYmSv4hIDlLyF0kDM9vezN43sw5JxyJSHrWSDkCkqjOzNoSSw6e7+wdJxyNSHkr+IhunOaEG0DHu/lHSwYiUl5p9RDbOD8AXhLpRIlWGrvxFNs4qQmXKl83sR3d/IumARMpDyV9kI7n7SjM7EnglOgEkXXpYpEyq6ikikoPU5i8ikoOU/EVEcpCSv4hIDlLyFxHJQUr+IiI5SMlfRCQHKfmLiOSg/wd6STphem9oYAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "Sum_of_squared_distances = []\n", "K = range(2,20)\n", "for k in K:\n", " km = KMeans(n_clusters=k, max_iter=200, n_init=10)\n", " km = km.fit(text_tf)\n", " Sum_of_squared_distances.append(km.inertia_)\n", "plt.plot(K, Sum_of_squared_distances, 'bx-')\n", "plt.xlabel('k')\n", "plt.ylabel('Sum_of_squared_distances')\n", "plt.title('Elbow Method For Optimal k')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 161, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAyoElEQVR4nO3dd5iU5dnG4d8FWLFgQWMXG1ETRQQUNSo2sIXYSzS22GKLGrESsRvsRiX22AXBglGxsWKJDSMWQCNRFFQUY1CwgOD9/fG8++2wDrszMMPszl7nccyx87aZ+92BuffpigjMzMzqa1XpAMzMrGlygjAzs7ycIMzMLC8nCDMzy8sJwszM8nKCMDOzvJwgrGCSDpb0fM52SFqrkjGVSinvRdJ4SduV4rUqTdJvJT1Rptd+RtLv53Csn6Q7y/G+VjgnCJtN9uX2naRpOY9rKh0X/H+CCklX1NvfO9v/9wJfZ45fTOUm6e+SZtT7/e5TotdeSNJFkj7KPsP3JJ0iSQVev3r2e2xTuy8i7oqIHUoRnzU/bRo/xVqgXSPiqUoHMQf/AfaWdEpEzMz2HQT8u4IxFat/RJw1txdLapNz77nuA34G7AS8A3QB7gBWAY6f2/ezlsslCJtXO0l6X9IXki6R1ApAUitJZ0n6UNLnkm6XtGR27DZJJ2fPV8r+aj0m215T0pe1r5PHJOAtoGd2/tLAZsDQ3JMkbSrpn5KmSHpD0tbZ/guAXwHX5CkdbZf91T1F0rW1f3k3dC/Z8QOzY/+VdObc/iIlHS5pXHb/QyWtmHMsJB0j6T3gvTzXbgvsAOwREW9HxMyIeAk4ADimtvosKz1dJOkVSV9Leij7HQI8m/2ckv1uus+hWvEP2e9pqqTzss/sn9nrDZK0YHbuUpL+IWmypP9lz1eei9/LApLukTSk9rVt/nCCsHm1G+kv1c5Ab+DQbP/B2aMHsAawGFD7ZTwC2Dp7vhXwPrBlzvZzEfFjA+95O/C77Pm+wEPA9NqDklYCHgHOB5YG/gQMkdQ+Is4EngOOjYjFIuLYnNfdBegKbADsTZaEGroXSesBA4ADgRWBZYC5+RLcBrgoe98VgA+Be+ud9htgE2C9PC+xPfByREzI3RkRLwMTgW1zdv+O9DmtAMwErs72134G7bLfzYtzCLcnsDGwKdAHuIGUiFYBfgHsl53XCrgVWA1YFfiOun8DBZG0CPAg6fPdOyJmFHO9zRsnCMvnweyv6NrH4Q2c+5eI+DIiPgKupO7L4bfA5RHxfkRMA04H9s3qt0cAW2SlhC2B/sDm2XVbZccb8gCwdfZX/O9ICSPXAcCjEfFoRPwYEU8CI0lVLw25OCKmZPdSA3Qq4F72BP4REc9GxHSgL9BQcgP4U87v9ouc97glIv6Vvc7pQHdJq+dcd1H2u/4uz2suC3w6h/f7NDte646slPFNFu/eklo3EnOu/hHxdUSMBt4Gnsh+N18BjwEbAUTEfyNiSER8GxFTgQtIn2+hlgCGkaoVD4mIWUVcayXgBGH5/CYi2uU8bmzg3Ny/WD8k/RVN9vPDesfaAMtHxH+Ab0hfwL8C/gF8IqkjBSSI7AvyEeAsYJmIeKHeKasBe+UmOWAL0l/MDZmU8/xbUkmhwXvJjv3/7yD70v1vI+9zac7vtvaLe7b3yBLRf4GVcq6brXRQzxfM+f5WyI7ne50PgQWYPYE05rOc59/l2V4MQNKikq7Pqt++JlVhtSsiGW1KKs1dHJ5VtCKcIGxerZLzfFXgk+z5J6Qv6txjM6n7MhlB+ut7wYj4ONs+CFgKGFXA+94OnAzk6wo5gfRXcm6SaxsRF2fHi/2yaehePiXndyBpUVI1U7Fmew9JbbPX+TjnnIbifgrYRFLu54GkTbL4hufsrv+Z/UBKIKX+Ej4Z6AhsEhFLUFeFVVCvKuAJUrXb05KWL3FsVgAnCJtXp2SNkasAJwADs/33ACdK6iBpMeBCYGBO75sRwLHUNYw+k20/X2BVwghSvftf8xy7E9hVUk9JrSUtLGnrnAbSz0htCYVq6F4GA7tI2iJrQD2Xuft/dQ9wiKROkhbK3uPliBhfyMVZr7OnSW0t62f3vSnpdzEgInIbtg+QtF6WzM4FBme/88mk6rFifjcNWZxUopiSNYSfXewLRER/4G5SkiimlGMl4ARh+Tys2fvpP9DAuQ8Br5H+6n8EuDnbfwupi+WzwAfA98BxOdeNIH2B1CaI54FFc7YbFMnTEfFlnmMTSA3mZ5C+9CYAp1D37/0qYM+sZ83V9a/PY473ktXDH0P6EvsU+B+pUbgo2Rd8X2BI9jprkhrgi7EHqe1kGDCNlBxuZvbfO9m9/J1UpbYwWRfYiPiW1E7wQlY1t2mx91HPlcAipNLJS1lcRYuI80gN1U/l9Liy+UCu2jNrOSQ9A9wZETdVOhZr+lyCMDOzvJwgzMwsL1cxmZlZXi5BmJlZXs1+sr5ll102Vl999UqHYWbWrLz22mtfRET7hs5p9gli9dVXZ+TIkZUOw8ysWZH0YWPnuIrJzMzycoIwM7O8nCDMzCwvJwgzM8vLCcLMzPJqcQmif3+oqZl9X01N2m9mZnVaXILo2hX23rsuSdTUpO2uXSsbl5lZU9Psx0EUq0cPGDQI9twTOnaE995L2z16VDoyM7OmpcWVICAlg65d4cUXYemlYZNNKh2RmVnT0yITRE0NvPYa7Lgj/Pvf0K0bfPmTZWfMzFq2FpcgatscBg2CRx+Fs8+G0aOhc2eYWPQ6YGZm1avFJYhXX529zaFfP7jsMpg0CTbbDMaMqWh4ZmZNRrNfD6JLly5Risn6Ro1KVU7Tp8M//pGShZlZtZL0WkR0aeicFleCmJNOneCf/4RlloHttoOHH650RGZmlVX2BCGpnaTBkt6RNFZSd0kDJY3KHuMljco5/3RJ4yS9K6lnuePL1aEDvPACrL8+7LYb3Hrr/Hx3M7OmZX6UIK4ChkXEz4ENgbERsU9EdIqITsAQ4H4ASesB+wLrA72A6yS1ng8x/r/lloPhw2GbbeDQQ1O10/Dhs5/jkddm1hKUNUFIWhLYErgZICJmRMSUnOMC9gbuyXb1Bu6NiOkR8QEwDuhWzhjzWXzx1A6x//4wbBjsvDM8/XQ65pHXZtZSlLsE0QGYDNwq6XVJN0lqm3P8V8BnEfFetr0SMCHn+MRs32wkHSFppKSRkydPLkvgCy4Id9wBJ54I33+fksSZZ9Z1kfXIazOrduVOEG2AzsCAiNgI+AY4Lef4ftSVHgoWETdERJeI6NK+fYNLqs6TVq1SF9j+/VPvpgsvhKOPdnIws5ah3AliIjAxIl7OtgeTEgaS2gC7AwNzzv8YWCVne+VsX8VI0KULLLxwev7Xv/50Nlgzs2pU1gQREZOACZI6Zru2BWqHom0HvBMRueOXhwL7SlpIUgdgbeCVcsbYmNo2h4EDUwN2+/azzwZrZlat5sdsrscBd0laEHgfOCTbvy/1qpciYrSkQaQkMhM4JiJmzYcY5yh35PWMGbDXXnDkkWm/q5rMrJp5JHURItL4iCeegLfegjXXnC9va2ZWch5JXWISXHstLLAAHHFEShhmZtXKCaJIK62UejUNH+6R1mZW3Zwg5sLhh8OWW8LJJ6dZYM3MqpETxFxo1QpuuAG++w6OO67S0ZiZlYcTxFzq2BH+/GcYPBgefLDS0ZiZlZ4TxDw45RTYYAM45hj46qtKR2NmVlpOEPNggQXg5ptTO8Spp1Y6GjOz0nKCmEdduqQJ/a6/Hp59ttLRmJmVjhNECZxzTlps6PDD08yvZmbVwAmiBNq2Tb2a/v1vOO+8SkdjZlYaThAlst12sPHGcPHF8MYbdfu9+pyZNVdOECXUt2+afmPvvWHmTK8+Z2bNmxNECfXuDWedlaqaNt/cq8+ZWfPmBFFi55yTqppeeSWNuG7XrtIRmZnNHSeIEnvmGfjww1R6+OKL1A22b9+0ZKmZWXPiBFFCtW0OgwalFegeeCANpjv//JQo5tOyFWZmJeEEUUK5q88B/PrX8NhjcPDB8OWXsOmmcMYZHithZs2DV5SbT6ZMSdOD33ILrLsubLMN7LHH7A3YNTUpyfTpU7EwzayF8IpyTUi7dmnepsceg6lT4brrYOed4fHH03F3iTWzpqbgBCFpc0lts+cHSLpc0mrlC6069eoFb78Nhx2W1pPYeWc49FB3iTWzpqeYEsQA4FtJGwInA/8Bbi9LVFVuySXhxhtT6aFt27R06S67ODmYWdNSTIKYGanBojdwTURcCyze2EWS2kkaLOkdSWMldc/2H5ftGy2pf875p0saJ+ldST2LvaHmZIEF0mPppeG22+Dvf690RGZmddoUce5USacDBwK/ktQKWKCA664ChkXEnpIWBBaV1IOUaDaMiOmSlgOQtB6wL7A+sCLwlKR1ImJWEXE2C7VtDvfdB6utlgbXHXZYKlHstVelozMzK64EsQ8wHTg0IiYBKwOXNHSBpCWBLYGbASJiRkRMAY4GLo6I6dn+z7NLegP3RsT0iPgAGAd0KyLGZiO3S+waa8BTT8GCC8Lxx3t1OjNrGgpOEFlSGAIslO36Anigkcs6AJOBWyW9LummrKF7HVIp5GVJIyTV9t1ZCZiQc/3EbN9sJB0haaSkkZMnTy70FpqUPn1mb3PYeGN46KE0+nr33T3y2swqr5heTIcDg4Hrs10rAQ82clkboDMwICI2Ar4BTsv2Lw1sCpwCDJKkQmOJiBsioktEdGnfvn2hlzV5O+yQxkkMH54G1/34Y6UjMrOWrJgqpmOAzYGvASLiPWC5Rq6ZCEyMiJez7cGkhDERuD+SV4AfgWWBj4FVcq5fOdvXYhx4IFx0Edx7rwfMmVllFZMgpkfEjNoNSW2ABodhZ9VSEyR1zHZtC4whlTx6ZK+zDrAgqcpqKLCvpIUkdQDWBl4pIsaqcOqpcOyxcNllcMUVlY7GzFqqYnoxjZB0BrCIpO2BPwAPF3DdccBdWQ+m94FDSFVNt0h6G5gBHJR1oR0taRApicwEjqnGHkyNkeDKK+HTT+Gkk2CFFWDffSsdlZm1NAXPxZR1az0M2AEQ8DhwU1R4MqfmMhfT3Pj++9Qu8fLLMGyYB9KZWemUei6mRYBbImKviNgTuCXbZ2Wy8MKpZ1O7dmmk9Ztv1h3zWtdmVm7FJIinmT0hLAI8VdpwrL6lloKrr06liW22gY8+8sR+ZjZ/FNMGsXBETKvdiIhpkhYtQ0xWzz77wLRpcPjhabxERBqB7SonMyunYkoQ30jqXLshaWPgu9KHZPkcdljqAvvFF6kRu2PHxq8xM5sXxSSIPwL3SXpO0vPAQODYskRlP1FTA48+Cr/7XUoSXbumta/NzMql4CqmiHhV0s+B2r9d342IH8oTluXKXeu6Rw/o1g2OOy79fOEFWGutSkdoZtWo2BXlugIbkEZD7yfpd6UPyeqrv9b1McfAgAGpXWLLLWHs2MrGZ2bVqZhxEHcAawKjgNrBaxERx5cntMJU8ziIxoweDdtum+ZsevJJ2HDDSkdkZs1FIeMgiunF1AVYr9ID46zO+uvDs8+mJNGjR1qhzl1fzaxUiqliehv4WbkCsbmzzjrw3HNpMN2228Lzz1c6IjOrFsUkiGWBMZIelzS09lGuwKxwq6+eksSKK0LPnvD005WOyMyqQTFVTP3KFYTNu5VWghEjYPvtU5I47zw4/fS64zU1qbHbU4ibWaGK6eY6opyB2LxbfvmUCLp3hzPOgBkz4OyzZ+8ma2ZWqGJWlNtU0quSpkmaIWmWpK/LGZwVb5ll4JVXYL31oF8/WHtt2HlnOOIIWHrpny5l2r9/SiC5PBGgmUFxbRDXAPsB75Em6vs9cG05grJ5065dmiJ8441h3LiUFC68EDp1grZtU/LYZ59UDTV9OuyxR127hScCNLNaxbRBEBHjJLXOFvG5VdLrwOmNXWfz36uvpqk4+vZNg+r694dFF4W33kqPkSNnr3LabjvYaCOYMGH2QXlm1nIVkyC+zVaFGyWpP/ApxY/Etvmg/tQcPXrUbZ9/ft1506bBmDEpYVxzDbz+Oqy6aippmJkV8wV/YHb+saQlQ1cBdi9HUDZv6k/N0aNH2n711dnPW2yxNJ/TGmvAxImw225pvYlOnTwRoJkVN9XGCRFxVWP75reWPNVGKdQvbVxxBZx8cmrHePLJ1I5hZtWn1EuOHpRn38FFRWRNTv3Sxoknws03p0WJttoqTTFuZi1ToyUISfsB+wNbAM/lHFoCmBUR25YvvMa5BFEen36a1sEeNSo1ch9xRKUjMrNSKtVkff8kNUgvC1yWs38q8Obch2dN2QorpJHZ++wDRx4J48enBu5W7pZg1mI0+t89Ij6MiGeA7YDnshHVnwIrA2rsekntJA2W9I6ksZK6S+on6WNJo7LHTjnnny5pnKR3JfWc+1uzebXYYvDQQylBXHQRHHDATwfamVn1KubvwWeBhSWtBDxB6tX09wKuuwoYFhE/BzYEape3uSIiOmWPRwEkrQfsC6wP9AKuk9S6iBitxNq0SVVMF18M99yTej397391xz3q2qx6FZMgFBHfkrq2XhcRe5G+yOd8gbQksCVwM0BEzIiIKQ1c0hu4NyKmR8QHwDigWxExWhlIcOqpcOaZ8OabqRvs+PEedW1W7YpKEJK6A78FHsn2NfbXfQdgMtmoa0k3SWqbHTtW0puSbpG0VLZvJWBCzvUTs331AzlC0khJIydPnlzELdi8OP/81A12wgT45S9hzz096tqsmhWTIP5ImlbjgYgYLWkNoKbhS2hDWr96QERsRBpgdxowgLR8aSdSe8Zlc3qBfCLihojoEhFd2rdvX8ylNo/++MfUo2naNGjd2uMkzKpZwQkiIkZExK8j4i/Z9vsFrEc9EZgYES9n24OBzhHxWUTMiogfgRupq0b6mDRCu9bK2T5rImpqYMgQ+O1vYfLkNIfTzJmVjsrMyqHRBCHpyuznw7kryRWyolxETAImSOqY7dqWtCrdCjmn7UZazhRgKLCvpIUkdQDWBl4p7pasXHJHXd95Zxpx/eqraTZYr1RuVn0KGQdxR/bz0rl8j+OAu7KJ/t4HDgGultQJCGA8cCRAVnU1CBgDzASOyWaOtSag/qjrSy9NczgNHAhXXplGYZtZ9Sh4LqamyiOpK+vHH2GvveCBB+DBB+HXv650RGZWiJKMpJb0Fukv/bwiYoO5iM2qRKtWcMcdsPXWsN9+8Nxz0LlzpaMys1IopIppl+znMdnP2iqnA2ggcVjLseiiMHQobLIJ7LprWs1u5ZUrHZWZzatCp9r4ENg+IvpExFvZ41Rgh/KHaM3Bz34GjzwCU6emSf6mTq10RGY2r4odKLd5zsZmRV5vVe4Xv4D77oO3307VTbPcvcCsWSvmC/4w0txI4yWNB64DDi1LVNZs9ewJf/1rKk2cdFKlozGzeVHwmtQR8RqwYTa/EhHxVe5xSQdFxG0ljs+aoaOPhvfeS9NyRMDVV9cdq6lJ3WX79KlcfGZWmKKriCLiq/rJIXNCCeKxKnHJJbDZZqk0cdFFaZ8n9zNrXkrZhtDo2hDWcrRuDU88AWuvDWecAR07ws47w4EHpuMTJ6YxFLX6908JJJenEjerrFImCHd5tdm0bQvPPAPrrw///jfMmJGqnbbZBlZZJS1ItOGGaVbYUaPSILurr05zO7m0YVZ5BbdBFMAlCPuJd9+Fzz6Dvn3TwkN//Su0b58Sxnvvpcdbb8H776fEcMIJcM01aVEiTyVuVlmlTBAvlPC1rArkTu7Xo0d61G4fffTs586cCR9+mKYSHz48TSO+9dYVCdvMMoVMtdFgZ8WIuDz7eWypgrLqUH9yvx490varr/60ZNCmDXz0UVqxrnt3ePFFOOoouP76+R+3mSWFlCAWz352BLqSpuQG2BVPxW0NyNeVtbYkUV9uaWOrrWDbbeGGG2D55eHcc8sfq5n9VKMJIiLOAZD0LGmxn6nZdj/qlh41myf1SxuPPZYaqC+8EHbYAbbYorLxmbVExfRiWh6YkbM9I9tnNs/69Jm9ZLHwwqkH1JprQu/eqbHbzOavYhLE7cArkvplpYeXAY+ctrJZZhl49NE0pmKnneDzzysdkVnLUsya1BeQVoP7X/Y4JCIuLFdgZpBKEEOHwiefpHES335b6YjMWo5iB8otCnwdEVcBE7N1o83KatNN4e674ZVX4IADPEus2fxScIKQdDZwKnB6tmsB4M5yBGVW3267pVHYDzwAf/pTpaMxaxmKGSi3G7AR8C+AiPhE0uINX2JWOiecAB98AFdeCR06wPHHVzois+pWTBXTjIgIsjmXJLUtT0hmc3bZZWlupxNOgAcfrNvvif3MSq+YBDFI0vVAO0mHA08BNzZ2kaR2kgZLekfSWEndc46dLCkkLZttS9LVksZJelNS52JvyKpb69ZpKvE2bWCffdL6157Yz6w8CkoQkgQMBAYDQ0ijqv8cEX8t4PKrgGER8XNgQ2Bs9pqrkNa0/ijn3B2BtbPHEcCAwm7DWpIdd4SBA9P8TVtskXo3DRxYmon9PO24WZ2CEkRWtfRoRDwZEadExJ8i4snGrstWn9sSuDl7nRkRMSU7fAXQh9mnCe8N3B7JS6TSygqF3461FLvvDscdl5LEtGlw3XXw5Zfz/rpdu6bSyP33p22XTqwlK6aK6V+Siv1v0gGYDNwq6XVJN0lqK6k38HFEvFHv/JWACTnbE7N9ZrOpqYG77oKzzkrrTjzwQFpb4pln5u1127eHddaBPfZIJZXc2WjNWppiEsQmwIuS/pO1D7wl6c1GrmkDdAYGRMRGwDdAP+AM4M9zEzCApCMkjZQ0cvLkyXP7MtZM5U7sd9558PDDsMQSaf3rbbZJK9j98ENxrzl+PBx0EGywAYweDcsuC8OGpXEXTg7WUinVHhVworRavv0R8WED1/wMeCkiVs+2f0VKEL8EasfErgx8AnQDzgGeiYh7svPfBbaOiE/n9B5dunSJkSNHFnQPVh36909VPrlf3DU18PzzaU2Jm29Ox+++G9Zaq+HX+vxzuOCCtJhR69ap6+xmm8Ehh8DXX6fG8Mcec5Kw6iPptYjo0uBJEVHUA1gOWLX2UcD5zwEds+f9gEvqHR8PLJs93xl4jLQ63abAK429/sYbbxxmuQYPjlhqqYjFFou49daIH3/86TlffRVx9tnpnNatIw4/PGLChIjhwyOWXTb9PO+8CIhYcsm0bVZNgJHRyPdrMSOpfy3pPeADYET2xf5YAZceB9yVVUd1Ahqav+lR4H1gHKkL7R8Kjc+s1h57wBtvpFXpDjkkVTtNmZKOff89HHMMrLACnHNOamcYPTqtPbHyyrNPO37KKdCxY2rj+Oc/K3pLZhVRzEjq80h/1T8VERtJ6gEc0NhFETEKmGMxJrLqp+x5AMcUEZNZXqusAk8/DUcemaqcOnaEk06Cyy9P1Uobbwx/+xt0qfcvM3eRo4UWSr2jtt0Wpk+fv/GbNQXFNFL/EBH/BVpJahURNTTwxW9Waa1bw003pS/5L76A005LXWEvvRRGjvxpcshnm21SQ/XFF3tNCmt5ikkQUyQtBjxLqjK6itQryaxJO/roVF0EKUmcfHJx1196aapm+sMfUk8ps5aimATRG/gOOBEYBvyHtC61WZNWU5Oqmfr2TdVK9UdKN2b55eGii2D48NQzyqylKLiba1Plbq7WkNwxEz16/HS7UD/+mLq/fvABvPMOLLVU+WI2mx8K6eZaTC+mqZK+zh7fS5ol6et5D9OsfHJ7JUH6OWhQ2l+MVq3SWIkvvoAzzyx9nGZN0VyVILLJ+3oDm0bEaSWPqgguQdj8dOKJcNVV8OKLsMkmlY7GbO6VtASRKxtn8SDQc26uN2uuzj0XVlwRjjoqTRRoVs0KHgchafeczVakLq7flzwisyZs8cXTinZ77QXXXpsWLjKrVsWUIHbNefQEppKqmcxalNqZXs86Cz7+uLBrvM6ENUcFJ4iIOCTncXhEXBARn5czOLOmSIJrrklVTH/8Y2HX1K4zUZskvM6ENQfFVDFd3dDxiPAS8tZirLEGbLUVDB6cZnvdcce0v6Ym9ZCqnbLju+9St9iJE9OUHb16wZprwqRJMGSIZ4m1pq2YuZgWBtYjLT0KsBcwBnix1EGZNQcnnghPPQWHHpqm4RgyJLVJ/OY3acW70aNh3Lg0hgJggQXS+ImxY9P2rbdChw6w+uqVugOzhhWzHsRLwBYRMTPbXgB4LiI2LWN8jXI3V6ukyy//6dQdrVrB2mvDL36RHuuvn35OnAj7759mmK2topLS7LJnnJEWKTKbX0rdzXUpYImc7cWyfWYt1kknpWojSCWH11+Hb75J1UqDB0O/fqnH06RJKTkMGpQaph95JPWI2m67NK5izTXhwgvTtWZNRTEJ4mLgdUl/l3Qb8C8aXtvBrOrV1KSZYfv2TSva/e9/sPDCPz0v34juwYNTO8bbb6dZY888M5U89tgjVV3Vfx/3eLL5rrEVhXIfwM9IXVt7Az8r5tpyPbyinFVK7upz+baL9cILEZtvnlaxa906ol+/tBrevL6uWT6UeEW5zYGpEfEQsDjQZ07rVJu1BKWa56nWZpvBc8/B0KFpdbt+/dLCR7vvXvzkgmalUEwj9ZvAhsAGwK3AzcDeEbFV+cJrnBuprRrNmgW77QYPP5y2Dz00TTm+3HKVjcuqR6kbqWdmxZLewLURcS2pJGFmJfbss2lCwD59YJFF4LbbYJ114OqrPQeUzT/FJIipkk4nrUP9iKRWwALlCcus5cpds+Ivf0k9npZYAtZaK42z6NwZRoyodJTWEhSTIPYBpgOHRcQkYGXgkrJEZdaC5WvbGDIkdZe9/374+mvYeuvUbbbQuaDM5kbJVpST9GJEdC/JixXBbRDW0nz7beryevHF0KYNbLllmhNqhx3qzqk/5YdZfWVbD2IO8vT+BkntJA2W9I6ksZK6SzpP0puSRkl6QtKK2bmSdLWkcdnxziWMz6wqLLpo6uE0ZkwaaPfYY7DTTnXjJDwRoJVKKRPEnIoiVwHDIuLnpF5QY4FLImKDiOgE/AP4c3bujsDa2eMIYEAJ4zOrKmusAQ8+mBLEz34Gp54Kv/xlqopyt1grhVImiJ+QtCSwJalLLBExIyKmRETuWtZtqUsuvYHbs3EcLwHtJK1QzhjNmrteveA//6kblT19OsyYUemorBo0miAkLVTgaynPvg7AZOBWSa9LuklS2+x1L5A0AfgtdSWIlYAJOddPzPbVj+kISSMljZw8eXKB4ZlVr3/+M80e+/vfpynGe/WCo4+GadMqHZk1Z4WUIF4EkHRHI+cdmGdfG6AzMCAiNgK+AU4DiIgzI2IV4C7g2IIjTtfeEBFdIqJL+/bti7nUrOrkdou98cbULXaRReBvf4MNN4QXXqh0hNZcFZIgFpS0P7CZpN3rP2pPioi381w7EZgYES9n24NJCSPXXcAe2fOPgVVyjq2c7TOzOajfLbZnz5QkjjoKIuBXv0rtE9OnVzZOa34KWTDoKFI1UDvSetS5Arh/ThdGxCRJEyR1jIh3gW2BMZLWjoj3stN6A+9kz4cCx0q6F9gE+CoiPi34bsxaoHxdWXv0SI+pU+FPf0o9nB59NK1q17v37A3Y7hJrc1LMXEyHRcTNRb+B1Am4CVgQeB84JNvuCPwIfAgcFREfSxJwDdAL+BY4JCIaHOTgcRBmjXv00dQ+8dlnaTryhx5KXWRzq6fc66llKWQcRDEJYkFSaWLLbNcI4G8R8cM8RTmPnCDMCvPf/6bV6wYOTAPsjjgiJQYnh5ap1APlrgM2zn5eR9b4PPfhmdn8tMwycO+9cM89KUFcdx1065am7TDLp5A2iFpdI2LDnO3hkt4odUBmVl7LL59GY6+wQqp62nzzVOXkDoFWXzEliFmS1qzdkLQGMKv0IZlZudS2OQweDOPGwR/+kKYV79gxJQuzXMUkiFOAGknPSBoBDAdOLk9YZlYOuV1iW7WCa6+Fm26CBReEnXeGY49NA+3MoMjZXLNR1R2zzXcjYnrOse0j4skSx9coN1Kbzbvvv4czzoArroB114W77oKNNqp0VFZOJZ/NNSKmR8Sb2aP+sJu/FB2hmTUJCy8Ml18OTzwBU6bAJpukGWKffnr282pq6maNtepXysn68s3FZGbNyPbbw1tvwa67pllid9wxdYsFTyPeEs2P6b7NrBlZZpnUiH3LLdC6Ney3Xxp97QF1LU9Zp/s2s+ZJgkMOSdOHr7giDB2aZoYdOBCefx5+/LHSEdr8UMoEMb6Er2VmTcBHH6VJ/vbbL0389/e/p8n/1lwTzjoL3nmn7tz+/VM1VC63WTRvBScISa0l/VrS8ZJOqn3UHo+I3Ru63syal9x5mu6+O7VJLLYYnH56Gjdx0UWpx1OXLnDllSlp7L13XZJwm0XzV8xI6oeB74G3SJPsmVkVqz+NeI8ecN99af+wYTBpUpq648474cQT07iKzp1Te8Xxx8P117vNorkrZrK+NyNigzLHUzSPgzCrvLFj09iJO++EDz9M+/74xzSuwpqmUo+DeEzSDvMYk5lVoXXXhfPPh5tvTtVQbdrAVVfBJZdUOjKbF8UkiJeAByR9J+lrSVMlfV2uwMyseampgX33TT2e3ngDVl01LUJ04IEwc2alo7O5UUyCuBzoDiwaEUtExOIRsUSZ4jKzZia3zWK99WD0aOjVK1U7bbcdfPJJpSO0YhWTICYAb0cxkzeZWYvRp8/sDdJt26aeT7ffnpJHp07w+OMVC8/mQjEJ4n3gGUmn5+vmamaWz4EHwsiRaR2KXr3gzDNd5dRcFJMgPgCeJq0tvXjOw8ysQeuuCy+/nNbFvvBCWHvtVB2Vy4Pqmp6Cx0FExDnlDMTMqtuii8KNN6YlTn//+9Sg/cEHcOqpsw/Ks6aj4AQhqYY8E/JFxDYljcjMqtpvf5tGX++0E5x2Wppi/M03PaiuKSpmJPWfcp4vDOwBuCbRzIrWsWOaCLBrVxg+PJUqnByanoLbICLitZzHCxFxErB1Y9dJaidpsKR3JI2V1F3SJdn2m5IekNQu5/zTJY2T9K6knnN1V2bW5L30Enz2GWywATzzDBx8cKUjsvqKmaxv6ZzHspJ6AUsWcOlVwLCI+DmwITAWeBL4RTZ1x7+B07P3WA/YF1gf6AVcJ6l1UXdkZk1ebpvDv/4FPXvCbbfB736XZo21pqGYKqbXqGuDmEma3vuwhi6QtCSwJXAwQETMAGYAT+Sc9hKwZ/a8N3BvtpzpB5LGAd2AF4uI08yauPoTAT76KOyyC9xxB6y2Gpx7blqTwiqr0QQhqSswISI6ZNsHkdofxgNjGrm8AzAZuFXShqQkc0JEfJNzzqFAtqghK5ESRq2J2b76MR0BHAGw6qqrNnYLZtbE9Okz+3arVvCPf8CRR6Y5nWbNggsucJKotEKqmK4n/dWPpC2Bi4DbgK+AGxq5tg3QGRgQERsB3wCn1R6UdCapNHJXMUFHxA0R0SUiurRv376YS82siWrVKk0RfuSRaa2J005zdVOlFVLF1Doivsye7wPcEBFDgCGSRjVy7URgYkS8nG0PJksQkg4GdgG2zZm+42NglZzrV872mVkL0KoVXHdd+tm/fypJXHKJSxKVUlCCkNQmImYC25JV7RRyfURMkjRBUseIeDe7fkzWwN0H2Coivs25ZChwt6TLgRWBtYFXirgfM2vmWrWCa6+F1q3hssvS+teXXeYkUQmFJIh7gBGSvgC+A54DkLQWqZqpMccBd0lakDSf0yHAq8BCwJNKn/pLEXFURIyWNIjUtjETOCYiZhV5T2bWzElw9dUpWVxxRVqEaPDguiRRU5Mauuu3ZfTvn8ZW5I6pmNO51rhGE0REXCDpaWAF4Imc6qBWpC//xq4fBdRftWitht4PuKCx1zWz6ialta4//hiGDIHdd4f7709jJuY0LUfXrnXHevTwFB7zquAlR5sqLzlqVt0iYJ990nrYK64Ikyenta+XXz5VQ9V/fP45jBgBm2yS1qTwFB75FbLkaDHjIMzM5jsJBg6EL75IJYIVV4QffkjVTrNm5X+0aZOSROfOaRoPmzvFTPdtZlYRzzwDb70FffvCjBlw6aUwalTaN2YMvPsujBuXZoe97TZYeGHYbLM0Svv44ysdffPlBGFmTVpuO8K556afe++d9jd07nPPweabwzXXwF/+Mv/jrgZOEGbWpNWflqNHj7T96qsNn9uqFQwbBmuuCWefnUoaVhw3UptZVZswAbp1g0UWgVdegWWXrXRETUMhjdQuQZhZVVtlFXjoIfj009RVdvr08r9n//4/rQJrjkuqOkGYWdXr1g1uvTW1Sxx9dPnneKodj1GbJGrbRrp2Le/7lpq7uZpZi7DvvvDOO3DOObDuunDKKeV7rx494J57YNddU1fbsWOb53gMlyDMrMX485/TX/KnngpDh5bvfcaMgTPPhG++SaWWHXdsfskBnCDMrAVp1SpVNW28Mey/P7zxRmlff+ZMuPhi2GijVFpZfHFYYolUmhg+vLTvNT84QZhZi7LooqnRul27VAU0aVJpXnf0aOjeHU4/PU3z0aZNep+rrkqJ4ze/yT92oylzgjCzFmfFFVMV06RJsM028P33dceK7W00cyZceGFqaxg/PrU17LJLmn22Rw848EBYf/1UmnjppUZfrklxgjCzFqlz59ROMHZs+kKfMaP43kZvvQWbbppe5ze/SW0Pe+2VphavbXNo3TpVO33ySSq1NCdOEGbWYp19Nhx2GDz9dJq/afvt01/7zz6bSgBjx6aJAWH2sQ0//JDWzq5ta7jvvjSh4JxWQN55Z/jVr1IPqmnT5s+9lYJHUptZi5Y7nfg666TZYN9/v26sxAILpP3t26eR2CecAI8/niYCXGih1AC9226Nv8+LL6YJBM89N006WGkeSW1m1ohnnkklg7594csv4cYb01/5r70Gt98OJ50EHTqk6cW//RYuuiiVGpZYAh57rLDkAKkBe/fdU0lk8uSy3lLJeKCcmbVYubO/9uiRHrnbnTvPfv4338CJJ6Yk0rdv8WMbLrww9Ww6//zUu6mpcwnCzFqsYmaKhVTF9MADKTkMGFB8t9WOHVObx4ABqRqrqXMbhJlZAeqXNupvF+qTT2CttVKvp7vvLlu4jXIbhJlZiRRb2piTFVdM1VT33JMaupuyspcgJLUDbgJ+AQRwKLAy0A9YF+gWESNzzj8dOAyYBRwfEY839PouQZhZc/PVV7DGGmnKjyeeqEwMTaUEcRUwLCJ+DmwIjAXeBnYHns09UdJ6wL7A+kAv4DpJredDjGZm882SS8JZZ8GTT6ZHU1XWBCFpSWBL4GaAiJgREVMiYmxEvJvnkt7AvRExPSI+AMYB3coZo5lZJfzhD7DaanDaafDjj5WOJr9ylyA6AJOBWyW9LukmSW0bOH8lYELO9sRs32wkHSFppKSRk5tLh2IzsxwLLQTnnZfaIQYNKvy6+blaXbkTRBugMzAgIjYCvgFOm9cXjYgbIqJLRHRpP6ex7WZmTdz++8MGG6S5nGbMKOya+blaXbkTxERgYkS8nG0PJiWMOfkYWCVne+Vsn5lZ1amdyO/99+GGGwq7Zuut4coroXfvVE01N11tC1XWBBERk4AJkjpmu7YFxjRwyVBgX0kLSeoArA28Us4Yzcwq6c03YcMN0xxNU6emfblVRt9/Dy+8AJdckqb1WGEFOOCAdO6AAWmN7XKtVjc/pto4DrhL0oLA+8AhknYD/gq0Bx6RNCoiekbEaEmDSElkJnBMRMyaDzGamVVEt26pFDFlClx2WZpN9rDDoGfPNH/Ta6/VzSi75pqwww6wzDJw222pBDFgQN00IaXmkdRmZhVWUwO9es3eDrHQQqldYbPN0qN7d1huudKN6G4q4yDMzKwBPXrAUUel59tvn1ae+/preO45+MtfUnvDcsul46Ua0V0Iz+ZqZlZhNTVpXqbaSQC//RYWXDD/uX36/HRfuaqYXIIwM6ug3Cqic89NP3O7sVaSE4SZWQXNzyqjYrmR2sysBXIjtZmZzTUnCDMzy8sJwszM8nKCMDOzvJwgzMwsr2bfi0nSZODDSscxj5YFvqh0EGVW7ffo+2v+qv0e69/fahHR4HoJzT5BVANJIxvrbtbcVfs9+v6av2q/x7m5P1cxmZlZXk4QZmaWlxNE01DgWlLNWrXfo++v+av2eyz6/twGYWZmebkEYWZmeTlBmJlZXk4QFSZpvKS3JI2S1OynpZV0i6TPJb2ds29pSU9Kei/7uVQlY5xXc7jHfpI+zj7HUZJ2qmSM80LSKpJqJI2RNFrSCdn+qvgcG7i/qvgMJS0s6RVJb2T3d062v4OklyWNkzRQ0hyWJMp5LbdBVJak8UCXiKiKATqStgSmAbdHxC+yff2BLyPiYkmnAUtFxKmVjHNezOEe+wHTIuLSSsZWCpJWAFaIiH9JWhx4DfgNcDBV8Dk2cH97UwWfoSQBbSNimqQFgOeBE4CTgPsj4l5JfwPeiIgBDb2WSxBWUhHxLPBlvd29gduy57eR/jM2W3O4x6oREZ9GxL+y51OBscBKVMnn2MD9VYVIpmWbC2SPALYBBmf7C/r8nCAqL4AnJL0m6YhKB1Mmy0fEp9nzScDylQymjI6V9GZWBdUsq1/qk7Q6sBHwMlX4Oda7P6iSz1BSa0mjgM+BJ4H/AFMiYmZ2ykQKSIpOEJW3RUR0BnYEjsmqL6pWpDrNaqzXHACsCXQCPgUuq2g0JSBpMWAI8MeI+Dr3WDV8jnnur2o+w4iYFRGdgJWBbsDP5+Z1nCAqLCI+zn5+DjxA+jCrzWdZvW9t/e/nFY6n5CLis+w/5Y/AjTTzzzGrux4C3BUR92e7q+ZzzHd/1fYZAkTEFKAG6A60k9QmO7Qy8HFj1ztBVJCktlkjGZLaAjsAbzd8VbM0FDgoe34Q8FAFYymL2i/OzG40488xa+S8GRgbEZfnHKqKz3FO91ctn6Gk9pLaZc8XAbYntbPUAHtmpxX0+bkXUwVJWoNUagBoA9wdERdUMKR5JukeYGvS1MKfAWcDDwKDgFVJU7PvHRHNtpF3Dve4NalqIoDxwJE59fXNiqQtgOeAt4Afs91nkOrpm/3n2MD97UcVfIaSNiA1QrcmFQIGRcS52ffNvcDSwOvAARExvcHXcoIwM7N8XMVkZmZ5OUGYmVleThBmZpaXE4SZmeXlBGFmZnk5QZiVgaTVc2d7NWuOnCDMzCwvJwizMpO0hqTXJXWtdCxmxWjT+ClmNrckdSSNXj04It6odDxmxXCCMCuf9qT5bnaPiDGVDsasWK5iMiufr4CPgC0qHYjZ3HAJwqx8ZpBmBX1c0rSIuLvSAZkVwwnCrIwi4htJuwBPZkliaKVjMiuUZ3M1M7O83AZhZmZ5OUGYmVleThBmZpaXE4SZmeXlBGFmZnk5QZiZWV5OEGZmltf/AdpEv42sln9PAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "Sum_of_squared_distances = []\n", "K = range(2,30)\n", "for k in K:\n", " km = KMeans(n_clusters=k, max_iter=200, n_init=10)\n", " km = km.fit(text_test_tf)\n", " Sum_of_squared_distances.append(km.inertia_)\n", "plt.plot(K, Sum_of_squared_distances, 'bx-')\n", "plt.xlabel('k')\n", "plt.ylabel('Sum_of_squared_distances')\n", "plt.title('Elbow Method For Optimal k')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 179, "metadata": {}, "outputs": [], "source": [ "true_k_dev = 10\n", "model_dev = KMeans(n_clusters=true_k_dev, init='k-means++', max_iter=200, n_init=10)\n", "model_dev.fit(text_tf)\n", "labels_dev=model_dev.labels_\n", "clusters_dev=pd.DataFrame(list(labels_dev),columns=['cluster'])" ] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [], "source": [ "true_k_test = 28\n", "model_test = KMeans(n_clusters=true_k_test, init='k-means++', max_iter=200, n_init=10)\n", "model_test.fit(text_test_tf)\n", "labels_test=model_test.labels_\n", "clusters_test=pd.DataFrame(list(labels_test),columns=['cluster'])" ] }, { "cell_type": "code", "execution_count": 180, "metadata": {}, "outputs": [], "source": [ "clusters_dev.to_csv(\"dev-0\\out.tsv\", sep=\"\\t\",index=False,header=None)" ] }, { "cell_type": "code", "execution_count": 163, "metadata": {}, "outputs": [], "source": [ "clusters_test.to_csv(\"test-A\\out.tsv\", sep=\"\\t\",index=False,header=None)" ] }, { "cell_type": "code", "execution_count": 181, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cluster
06
15
22
38
46
......
822
836
844
856
865
\n", "

87 rows × 1 columns

\n", "
" ], "text/plain": [ " cluster\n", "0 6\n", "1 5\n", "2 2\n", "3 8\n", "4 6\n", ".. ...\n", "82 2\n", "83 6\n", "84 4\n", "85 6\n", "86 5\n", "\n", "[87 rows x 1 columns]" ] }, "execution_count": 181, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clusters_dev" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }