diff --git a/Untitled.ipynb b/Untitled.ipynb index 963a355..bc2f14b 100644 --- a/Untitled.ipynb +++ b/Untitled.ipynb @@ -2,34 +2,33 @@ "cells": [ { "cell_type": "code", - "execution_count": 118, + "execution_count": 150, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", - "from sklearn.feature_extraction.text import CountVectorizer\n", - "from nltk.tokenize import RegexpTokenizer\n", "from many_stop_words import get_stop_words\n", - "from sklearn.model_selection import train_test_split\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from unidecode import unidecode\n", "from nltk.tokenize import word_tokenize\n", - "import string" + "import string\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.cluster import KMeans" ] }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 141, "metadata": {}, "outputs": [], "source": [ "data=pd.read_csv('dev-0/in.tsv', sep='\\t', header=None)\n", - "expected_data=pd.read_csv('dev-0/expected.tsv', sep='\\t', header=None)" + "data_test=pd.read_csv('test-A/in.tsv', sep='\\t', header=None)" ] }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 142, "metadata": {}, "outputs": [], "source": [ @@ -41,112 +40,87 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 143, "metadata": {}, "outputs": [], "source": [ "data[0] = data[0].str.lower()\n", + "data_test[0] = data_test[0].str.lower()\n", "stop_words = get_stop_words('pl')" ] }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 144, "metadata": {}, "outputs": [], "source": [ "data[0] = data[0].apply(unidecode)\n", + "data_test[0] = data_test[0].apply(unidecode)\n", "uni_stop_words = [unidecode(x) for x in stop_words]" ] }, { "cell_type": "code", - "execution_count": 123, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 opowiesc prawdziwa... olsztyn, akademik, 7 pie...\n", - "1 ja podejrzewam ze o polowaniu nie bylo mowy, p...\n", - "2 smutne. przypomina mi to historie z balwankami...\n", - "3 mam kumpla ktory zdawal w walentynki i polozyl...\n", - "4 przypomniala mi sie jedna z krakowskich urban ...\n", - " ... \n", - "82 wczoraj w popoludniowej audycji w trojce prowa...\n", - "83 sluchajcie! uwielbiam opowiadacv i sluchac jak...\n", - "84 wczoraj na probie (do koncertu czwartkowego) n...\n", - "85 zuzanna mala byla taka jedna historia ze przys...\n", - "86 koszmar zaczyna sie od niewinnego spotkania w ...\n", - "Name: 0, Length: 87, dtype: object" - ] - }, - "execution_count": 123, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 124, + "execution_count": 146, "metadata": {}, "outputs": [], "source": [ - "data[0] = data[0].apply(remove_punctuations)" + "data[0] = data[0].apply(remove_punctuations)\n", + "data_test[0] = data_test[0].apply(remove_punctuations)" ] }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 147, "metadata": {}, "outputs": [], "source": [ - "data[0] = data[0].apply(lambda x: ' '.join([item for item in x.split() if item not in uni_stop_words]))" + "data[0] = data[0].apply(lambda x: ' '.join([item for item in x.split() if item not in uni_stop_words]))\n", + "data_test[0] = data_test[0].apply(lambda x: ' '.join([item for item in x.split() if item not in uni_stop_words]))" ] }, { "cell_type": "code", - "execution_count": 126, + "execution_count": 148, "metadata": {}, "outputs": [], "source": [ "tf=TfidfVectorizer()\n", - "text_tf= tf.fit_transform(data[0])" + "text_tf= tf.fit_transform(data[0])\n", + "text_test_tf= tf.fit_transform(data_test[0])" ] }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 149, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "<87x5203 sparse matrix of type ''\n", - "\twith 8407 stored elements in Compressed Sparse Row format>" + "<691x15352 sparse matrix of type ''\n", + "\twith 42571 stored elements in Compressed Sparse Row format>" ] }, - "execution_count": 127, + "execution_count": 149, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "text_tf" + "text_tf\n", + "text_test_tf" ] }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 160, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEWCAYAAACOv5f1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAtPklEQVR4nO3debxV8/7H8deniUZJCUWRe7uGq9RJSSLdJFO4RDJkyhyJzD9xEZmHRCRlKCmZS+LIlHQaUMkQZYqboRmpPr8/vuvcdumc9q69z9rn7Pfz8diPc/ba0/vs+Ky1vus7mLsjIiK5pVzcAUREpOSp+IuI5CAVfxGRHKTiLyKSg1T8RURykIq/iEgOUvGXvzCz7mb2TsJ9N7Nd48yULun8W8xsnpn9Kx3vFTcz62Zm4zP03m+a2ZlFPNbXzJ7IxOdK8VT8c1RUuH4zs2UJt/vjzgX/2/m4md213vbO0fbHknyfIotOppnZY2a2cr3v9/g0vfcWZtbPzL6O/g0/N7PLzMySfH3D6HusULjN3Z9094PTkU9Khwobf4qUYUe4+4S4QxRhLtDFzC5z91XRtlOBz2LMlKr+7n7Npr7YzCok/O2JngG2Aw4F5gB5wOPAjkDPTf08yS068pdkHWpmX5rZT2Z2m5mVAzCzcmZ2jZnNN7P/mtkwM9sqemyomfWOfq8XHW2eH91vZGa/FL7PBvwAfAx0jJ5fC2gNvJD4JDNrZWbvmdkiM/vQzA6Mtt8E7A/cv4Gzmn9FR8uLzGxA4RFzcX9L9PjJ0WM/m9nVm/pFmtlZZvZF9Pe/YGY7JDzmZna+mX0OfL6B17YHDgb+7e4z3X2Vu78PnAScX9ikFZ319DOzD8xsiZk9H32HAG9FPxdF382+RTT1nRd9T0vN7D/Rv9l70fuNNLNK0XO3NrOXzGyhmf0a/V5/E76XimY23MxGF763ZI6KvyTraMIRZjOgM3B6tL17dGsH7AJUAwoL7UTgwOj3A4AvgbYJ99929zXFfOYw4JTo9xOA54E/Ch80s3rAy8CNQC3gUmC0mdVx96uBt4EL3L2au1+Q8L6HAy2AvYAuRDuY4v4WM9sdGAicDOwAbANsSoE7COgXfe72wHxgxHpPOwpoCey+gbfoAEx2928SN7r7ZOBboH3C5lMI/07bA6uAe6Pthf8GNaPvZlIRcTsCzYFWQB9gEGEnsyOwJ9A1el45YAjQANgJ+I21/w0kxcwqA88R/n27uPvKVF4vqVPxz23PRUe/hbezinnure7+i7t/DdzN2v/xuwF3uvuX7r4MuBI4IWpPngi0iY7u2wL9gf2i1x0QPV6cMcCB0dH3KYSdQaKTgFfc/RV3X+PurwEFhOaQ4tzi7ouivyUfaJrE33Is8JK7v+XufwDXAsXtuAAuTfhuf0r4jEfdfVr0PlcC+5pZw4TX9Yu+69828J61gQVFfN6C6PFCj0dnB8ujvF3MrPxGMifq7+5L3H0WMBMYH303i4GxwN4A7v6zu4929xXuvhS4ifDvm6wawDhCU99p7r46hdfKJlLxz21HuXvNhNvDxTw38UhzPuHol+jn/PUeqwDUdfe5wHJCcd0feAn43swak0Txj4rfy8A1wDbu/u56T2kAHJe4AwPaEI50i/NDwu8rCEf4xf4t0WP/+w6igvrzRj7n9oTvtrAor/MZ0U7mZ6BewuvWOapfz08U/fdtHz2+ofeZD1Rk3Z3DxvyY8PtvG7hfDcDMqpjZQ1GT2BJCs1LNFHY0rQhnYbe4ZposMSr+kqwdE37fCfg++v17QhFOfGwVawvFRMJRcyV3/y66fyqwNTAjic8dBvQGNtQd8BvC0W3iDqyqu98SPZ5qISnub1lAwndgZlUITT+pWuczzKxq9D7fJTynuNwTgJZmlvjvgZm1jPK9kbB5/X+zPwk7h3QX2N5AY6Clu9dgbbNSUr2PgPGEprDXzaxumrNJEVT8JVmXRRf2dgQuAp6Otg8HepnZzmZWDbgZeDqhl8pE4ALWXmR8M7r/TpKn9xMJ7dz3beCxJ4AjzKyjmZU3sy3N7MCEi40/Etruk1Xc3zIKONzM2kQXI29g0/7/GQ6cZmZNzWyL6DMmu/u8ZF4c9c56nXBtY4/o725F+C4GunviReKTzGz3aEd1AzAq+s4XEpqsUvluilOdcCawKLqofF2qb+Du/YGnCDuAVM5OZBOp+Oe2F23dfuhjinnu88BUwtH6y8DgaPujhG6GbwFfAb8DFya8biKhOBQW/3eAKgn3i+XB6+7+ywYe+4Zw8fkqQkH7BriMtf9d3wMcG/VAuXf9129AkX9L1O59PqFALQB+JVxgTUlUvK8FRkfv04hwMTsV/yZcqxgHLCMU/sGs+70T/S2PEZq5tiTqBuruKwjt8u9GzWWtUv071nM3UJlwVvF+lCtl7v4fwkXfCQk9kyRDTE1sImWPmb0JPOHuj8SdRbKTjvxFRHKQir+ISA5Ss4+ISA7Skb+ISA4qNRO71a5d2xs2bBh3DBGRUmXq1Kk/uXud9beXmuLfsGFDCgoK4o4hIlKqmNn8DW1Xs4+ISA5S8RcRyUEq/iIiOUjFX0QkB6n4i4jkoDJb/Pv3h/z8dbfl54ftIiK5rswW/xYtoEuXtTuA/Pxwv0WLeHOJiGSDUtPPP1Xt2sHIkXDUUXDAATBpUrjfrl3cyURE4ldmj/whFPoddoAXX4SttoJdd407kYhIdijTxT8/H376CTp2hLlzoXFjGDQINJediOS6Mlv8C9v4R46EcePgiSdg1So4+2zo0AHmzYs7oYhIfMps8Z8yZd02/m7dYOxYOPpomDwZ9twTBgyANWvizSkiEodSM59/Xl6ep2tit/nz4ayz4LXXwsXgwYOhUaO0vLWISFYxs6nunrf+9jJ75F+cBg3g1VfhkUdg+nT45z/h7rth9eq4k4mIlIycLP4AZnDGGTBrVmga6tUL2raFTz+NO5mISOblbPEvVL8+vPQSDB0Ks2dD06Zw2206CxCRsi3niz+Es4BTTgnFv2NH6NMHWrcO90VEyiIV/wTbbw9jxsBTT4VxAXvvDTffHLqIioiUJSr+6zGDrl3DtYAjj4Srr4aWLeGjj+JOJiKSPir+RahbF555Jty++Qby8uCGG+DPP+NOJiKy+VT8N+LYY0Pb/7HHwnXXhVlBp0+PO5WIyOZR8U9C7drhOsCYMfDDD2EHcM018McfcScTEdk0Kv4pOOqocBZw4olw003QvHmYRkJEpLRR8U9RrVowbFgYG/Drr9CqFVxxBfz+e9zJRESSp+K/iQ47LPQIOu00uPXW0C100qS4U4mIJEfFfzPUrBnmBxo3DpYvh/32g969YcWKuJOJiBRPxT8NOnaEmTOhRw+4805o0gTefjvuVCIiRVPxT5MaNeDBB+H118OI4AMOgJ49wxmBiEi2UfFPs4MOgo8/hgsugPvuC9NF5+fHnUpEZF0q/hlQrRrcey9MnAjlyoUdwrnnwtKlcScTEQlU/DOobdswJ1CvXvDQQ2HpyPHj404lIqLin3FVqoSLwO+8A5Urh4vDZ54JixfHnUxEcpmKfwlp3TrMCdSnDwwZAnvsAa+8EncqEclVGS/+ZtbLzGaZ2UwzG25mW5rZzmY22cy+MLOnzaxSpnNkg8qVw4CwSZNgq63CQLFTTw0jhUVESlJGi7+Z1QN6AnnuvidQHjgBuBW4y913BX4Fzshkjmyzzz4wbVpYK+DJJ2H33cNOYP1eQfn50L9/PBlFpGwriWafCkBlM6sAVAEWAAcBo6LHhwJHlUCOrLLFFnDjjfDBB7DttmG+oE6d4LnnwuP5+dClS5hBVEQk3TJa/N39O+B24GtC0V8MTAUWuXvh4ojfAvU29Hoz62FmBWZWsHDhwkxGjU2zZmFm0L59w0IxxxwT1g7o0gVGjoR27eJOKCJlUaabfbYGOgM7AzsAVYFDkn29uw9y9zx3z6tTp06GUsavUqWwUMz06WEFsdGjoU6dcFFYRCQTMt3s8y/gK3df6O5/As8C+wE1o2YggPrAdxnOUSr8/HOYGqJdO/jkE/jb32D4cHCPO5mIlDWZLv5fA63MrIqZGdAemA3kA8dGzzkVeD7DObJeYRv/yJHwxhvw6KNhdtATT4Sjj4YFC+JOKCJlSabb/CcTLuxOAz6OPm8QcDlwiZl9AWwDDM5kjtJgypR12/hPOw3GjoVDDw1TRu+xR7gorLMAEUkH81JSTfLy8rygoCDuGLH49FM4/XR4772wM3joIahfP+5UIlIamNlUd89bf3vSR/5mtp+ZVY1+P8nM7jSzBukMKRvWuDG89RbcfXdoHtpjDxg8WGcBIrLpUmn2GQisMLMmQG9gLjAsI6nkL8qXh4suCtNF7713mB/okEPg66/jTiYipVEqxX+VhzaizsD97j4AqJ6ZWFKURo3CBeEBA+Ddd8NZwIMPwpo1cScTkdIkleK/1MyuBE4GXjazckDFzMSS4pQrB+edF5aObNUqrBXwr3/Bl1/GnUxESotUiv/xwB/A6e7+A6F//m0ZSSVJadgwrA8waBAUFIRVw+67T2cBIrJxSRf/qOCPBraINv0EjMlEKEmeGZx1FsyaFRaP6dkTDjwQPv887mQiks1S6e1zFqHP/kPRpnrAcxnIJJtgxx3D+gBDhoTVw/baKywis3p13MlEJBul0uxzPmFqhiUA7v45sG0mQsmmMYPu3WH2bOjQAXr3hjZtYM6cuJOJSLZJpfj/4e4rC+9Ec/Oop3kW2mEHeP55eOIJ+OwzaNo0LCKzatVGXyoiOSKV4j/RzK4izM3fAXgGeDEzsWRzmUG3buFawGGHwRVXhKUkZ86MO5mIZINUiv8VwELCHD1nA68A12QilKTPdtvBqFHw9NPw1Vdh/YAbbwxrB4hI7kql+FcGHnX349z9WODRaJtkObMwY+js2WGxmGuvDUtJzpgRdzIRiUsqxf911i32lYEJ6Y0jmVSnDowYAc8+G6aIbtEC/u//YOXKjb9WRMqWVIr/lu6+rPBO9HuV9EeSTDv66HAW0LUr/Oc/kJcHU6fGnUpESlIqxX+5mTUrvGNmzYHf0h9JSkKtWmF9gBdfDCuItWwJV10Fv/8edzIRKQmpFP+LgWfM7G0zewd4GrggI6mkxBx+eOgRdOqp0K9fuCA8eXLcqUQk01KZ3mEK8A/gXOAcYDd3V2NBGVCzZlgfYNw4WLYsdAm99FL4Ted1ImVWqss4tgD2ApoBXc3slPRHkrh07BjGAZx1FtxxBzRpAu+8E3cqEcmEVOb2eRy4HWhD2Am0AP6yNJiUbjVqhPUBJkwIYwHatg2LyCxfHncyEUmnCik8Nw/Y3UvLor+yWdq3D6uGXXkl3HsvvPRSaBo68MC4k4lIOqTS7DMT2C5TQST7VKsW1geYODEMFGvXDs4/H5YujTuZiGyuVIp/bWC2mb1qZi8U3jIVTLJH27ZhmuhevWDgQNhpJ7j99nWfk58P/fvHk09EUpdKs0/fTIWQ7FelSlgf4Nhj4YQT4LLLQsF/6imYNi1MHzFyZNwpRSRZSRd/d5+YySBSOrRuDZ9+CqedFiaL22mn0CQ0ZkxoFhKR0iGV3j6tzGyKmS0zs5VmttrMlmQynGSnypXDHEGnnw5LlsDixWG+oBUr4k4mIslKpc3/fqAr8DlhUrczgQGZCCXZLz8fXnghrBNQuTLcfz/svbdGB4uUFikN8nL3L4Dy7r7a3YcAh2QmlmSz/Py1bfz9+sHLL8NWW8GiRaFZ6JprNFOoSLZL5YLvCjOrBMwws/7AAlIfISxlwJQpofAXtvG3axfa/N9+G+bNg5tuCjuEYcPgn/+MNaqIFMGSHbNlZg2AH4FKQC9gK2CAu8/NXLy18vLyvKCgoCQ+SjbTCy+EKSIWLQpTRvfuDeXLx51KJDeZ2VR3/8tsDKkcuR/l7r+7+xJ3v97dLwEOT19EKSuOPDLMEXTEEXD55XDAATC3RA4RRCRZqRT/UzewrXtxLzCzxmY2I+G2xMwuNrOmZvZ+tK3AzPZJKbVkvTp14Jln4Iknwo6gSZMwZ5AmBxHJDhst/mbW1cxeBHZOHNlrZm8CvxT3Wnf/1N2buntToDmwAhgD9Aeuj7b/X3Rfyhgz6NYtFP/WreHcc6FTJ/juu7iTiUgyF3zfI1zcrQ3ckbB9KfBRCp/VHpjr7vPNzIEa0fatgO9TeB8pZerXh1dfDUf+l14Ke+4JAwaEZSTN4k4nkptSueBbFfjN3deY2d8JC7uMdfc/k3z9o8A0d7/fzHYDXgWMcPbR2t3nb+A1PYAeADvttFPz+fP/8hQpZT7/PKwaNmlSmCpi4ECoXTvuVCJlVzou+L4FbGlm9YDxwMnAY0l+eCXgSOCZaNO5QC9335HQc2jwhl7n7oPcPc/d8+rUqZNCVMlWf/tb6BLarx88/3w4C3jppbhTieSeVIq/ufsK4BjgAXc/Dtgjydd2Ihz1/xjdPxV4Nvr9GUAXfHNI+fJhZHBBAdStG3oFnXlmmCpCREpGSsXfzPYFugEvR9uS7b3dFRiecP974IDo94MIU0ZIjtlrL/jgg7BgzJAh4f6bb8adSiQ3pFL8LwauBMa4+ywz2wXI39iLomsFHVh7pA9wFnCHmX0I3EzUri+5Z4st4Oabw1rBFSuG0cK9emnxeJFMS/qCb9w0wrfsW748DAobMAD+8Q94/HHI0yrRIptlky/4mtnd0c8X1+vnr5W8JK2qVg2zg44fH5aKbNUK+vYNC8mLSHol08//8ejn7cU+SyRNOnQIA8N69oTrrw+9gYYNg913jzuZSNmx0SN/d58a/Zy4oVvmI0ouqlkzFPzRo2H+fGjWLCwjuWZN3MlEyoZkmn0+NrOPirqVREjJXcccE84CDjkkzA7arh189VXcqURKv2R6+xwOHAGMi27dottY4JXMRRMJ6tYN6wUMGQLTp4cuoY88okniRDZHMs0+86OpFzq4ex93/zi6XQ4cnPmIImEOoO7d4eOPoUWLsF7AEUfAggVxJxMpnVId5LVfwp3WKb5eZLM1aAATJsA998Drr4fpIUaOjDuVSOmTSvE+A3jAzOaZ2TzgAeD0jKQSKUa5cqEn0PTp0KgRHH98mCH0l2InGBeRREkXf3ef6u5NgCZAk2ie/mmFj5vZhhZ7EcmYf/wD3nsvLBU5alQ4Cxg7Nu5UIqVDys027r7Y3Rdv4KGL0pBHJCUVKsA118DkyVCrFhx6KJxzDixbFncykeyWzjZ7LcshsWnWLMwSetllMGhQWDbynXfiTiWSvdJZ/NXxTmK15ZbQvz9MnBi6gbZtC336wE03Qf56UxDm54fniuQqHflLmbP//vDhh6E76G23hTEBxxyzdgeQnw9duoQuoyK5Kp3F/900vpfIZqleHR56CF5+Gf74IywUc/jhcPXVofCPHBlGC4vkqo1O6WxmlxT3uLvfmdZERdCUzrKpfvkFzj8fRowI9087DR59NN5MIiVlc9bwrR7d8ghr79aLbucAzdIZUiQTatWCHj2gRo1wRjBkSBgd/OuvcScTiU8y0ztc7+7XA/WBZu7e2917A82BnTIdUGRzFbbxP/ccfPcdHHtsmCa6USMYPlxzBEluSqXNvy6wMuH+ymibSFabMmVtG3/16vDMM/Dgg1ClCpx4YpgxdO7cuFOKlKxUiv8w4AMz62tmfYHJwNCMpBJJoz59/npx9+yzwzoB994LkyaF0cE33QQrV274PUTKmlSmd7gJOA34Nbqd5u43ZyqYSKaVLw8XXgiffBJ6Al1zDTRtCm+/HXcykcxLtatnFWCJu98DfGtmO2cgk0iJqlcvNAW99BKsWBEGh51xBvz8c9zJRDIn6eJvZtcBlwNXRpsqAk9kIpRIHA47DGbNCs1EQ4eGieOGDdMFYSmbUjnyPxo4ElgO4O7fE7qAipQZVavCrbfCtGmw665w6qnQvj18+mncyUTSK5Xiv9LDiDAHMLOqmYkkEr+99oJ33w29gqZNC/f79oXff487mUh6pFL8R5rZQ0BNMzsLmAA8nJlYIvErVy70CpozB/79b7j++rATeOONuJOJbL6kir+ZGfA0MAoYDTQG/s/d78tgNpGssN128NRT8OqrsGZNaAY6+WT473/jTiay6ZIq/lFzzyvu/pq7X+bul7r7axnOJpJVDj44LCB/9dXw9NPhgvAjj4Qdgkhpk0qzzzQz0yS4ktMqV4Ybb4QZM8LAsLPOggMOCL2EREqTVIp/S2CSmc01s4/M7GMz+yhTwUSy2e67w5tvwuDBMHt2GBx21VVhnIBIaZBK8e8INAIOAo4ADo9+iuSkcuXg9NPDBeFu3aBfv3A2MG5c3MlENi6V6R3mu/t84DdCd8//dfsUyWV16sBjj4VeQBUrQqdOcMIJsGBB3MlEipbKCN8jzexz4CtgIjAPGLuR1zQ2sxkJtyVmdnH02IVmNsfMZpmZVlOVUq9dO/joo9AldMyYcEF44EBdEJbslEqzz3+AVsBn7r4z0B54v7gXuPun7t7U3ZsS5v9fAYwxs3ZAZ6CJu+8B3L4p4UWyzRZbwP/9X+gVlJcH550HrVuHNYVFskkqxf9Pd/8ZKGdm5dw9n7C6V7LaA3OjpqNzgVvc/Q8Ad1ePaSlT/v53mDAhzA305ZfQvDlceiksWxZ3MpEgleK/yMyqAW8BT5rZPUTz/CTpBGB49Pvfgf3NbLKZTSyqC6mZ9TCzAjMrWLhwYQofJRI/szAYbM6csG7wHXfAHnvAiy/GnUwkteLfmXCxtxcwDphLkr19zKwSYVK4Z6JNFYBahGakywhTR9j6r3P3Qe6e5+55derUSSGqSPaoVQsefjisE1CtGhx5ZJgu4ttv404muSyV3j7L3X21u69y96Hufm/UDJSMTsA0d/8xuv8t8KwHHwBrgNqpRRcpXdq0genT4eab4ZVXYLfd4J57YPXquJNJLkqlt8/SqLfOEjP73cxWm9mSJF/elbVNPgDPAe2i9/07UAn4KdksIqVVpUpw5ZVhRPB++8HFF8M++8DUqXEnk1yTypF/dXev4e41gMrAv4EHNva6aOrnDsCzCZsfBXYxs5nACODUaP4gkZywyy4wdiyMGAHffx92ABddBDfcAPn56z43Px/6qzO0pJltTs01s+nuvnca8xQpLy/PCwoKSuKjRErUokVhaogHHwzXB/78M4wTOOigUPi7dIGRI/+6CL1IMsxsqrv/pWdmhRTe4JiEu+UI3Ty1tIXIZqpZEx54IKwa1qNHGCjWqROcc06YSlqFXzIhld4+RyTcOgJLCT2ARCQNWraEggK47bYwKvjee6FjRxV+yYykj/zd/bRMBhGRMDdQ8+ZQvTqULw9PPglbbx12BH/tDC2y6VJp9rm3uMfdvefmxxHJbYVt/KNHh53AIYfA/ffD3LnhOsAWW8SdUMqKVJp9tgSaAZ9Ht6aELppTo5uIbKYpU9a28deoAe+8E6aLHjs2XAD+4Ye4E0pZkXRvHzN7H2jj7qui+xWBt929VQbz/Y96+0guGzkSuneHbbaB554LZwUiySiqt08qR/5bAzUS7leLtolIhnXpAu++G9r927QJ4wNENkcqxf8WYLqZPWZmQ4FpwM2ZiSUi69t779As1Lw5dO0axgZorQDZVKmM8B1CWMd3DGG07r7uPjRTwUTkr+rWDSuGnXlmWDayc2dYkuwkKyIJUpnbZz9gqbs/D1QH+phZg4wlE5ENqlQJBg2C++4LF4JbtYIvvog7lZQ2qTT7DARWmFkT4BLClM7DMpJKRIplBhdcAOPHw48/hrmBJkyIO5WUJqkU/1XR5GudgQHuPoBwBiAiMTnooHAdYIcdwmjge+4BTZEoyUil+C81syuBk4CXzawcUDEzsUQkWbvsApMmwRFHhCmizzgD/vgj7lSS7VIp/scDfwBnuPsPQH3gtoykEpGUVK8Ozz4L114LQ4aEQWIaECbFSaW3zw/ufqe7vx3d/9rd/9fmb2aTMhFQRJJTrlxYD2DkSPjwQ8jLCxPFiWxIKkf+G7NlGt9LRDbRcceFAWHly8P++4dpoUXWl87ir8tMIlmiadNwIbhFizA30BVXaK1gWVc6i7+IZJFttw3dP3v0gFtvDQPCFi+OO5Vki40WfzNLdhJZzTYukmUqVQrLQw4YAOPGhQFhn38edyrJBskc+U8CMLPHN/K8kzc/joikmxmcdx689hosXBgGhI0fH3cqiVsyxb+SmZ0ItDazY9a/FT7J3WdmLqaIbK527cJ1gB13DGsE33WXBoTlsmRW8joH6AbUJKzfm8gJk7yJSCmw887w3ntwyilwySVhsfgHH9QKYbloo8Xf3d8B3jGzAncfXAKZRCSDqlWDUaPCmIDrr4c5c8IAse23jzuZlKRUevs8bmY9zWxUdLswWs1LREqZcuWgb9+wE/joozAgbMqUuFNJSUql+D8ANI9+PkBYz3dgJkKJSMn4979DM1DFimFA2JNPxp1ISkoybf6FWrh7k4T7b5jZh+kOJCIlq0mTcNR/7LFw0knhTODmm8MIYSm7UjnyX21mjQrvmNkugMYMipQBdeqErqDnnAP9+8ORR2pAWFmXypH/ZUC+mX1JGNDVADgtI6lEpMRVqgQDB8Jee0HPntCyJbzwAvz973Enk0xIZVbP14G/AT2BC4HG7p5f+LiZdUh/PBEpaeeeG6aF+PnnMCDs1VfjTiSZkNLcPu7+h7t/FN3WXy7i1jTmEpEYHXBAuA7QoAEceijceacGhJU16ZzY7S9z+5hZYzObkXBbYmYXJzze28zczGqnMYeIpEHDhmFq6KOPht69oXt3+P33uFNJumR0Smd3/9Tdm7p7U0I30RXAGAAz2xE4GPg6jRlEJI2qVQuLw/TtC8OGwYEHwvffx51K0qEkp3RuD8x19/nR/buAPmgdAJGsVq4cXHcdjB4NM2eGNQI0IKz0S2fxn7eRx08AhgOYWWfgO3cvdpyAmfUwswIzK1i4cGF6UorIJjnmmDAgrFKlMCDsiSfiTiSbwzzJqzhmVh44DGhIQhdRd78ziddWAr4H9gCWAvnAwe6+2MzmAXnu/lNx75GXl+cFWpBUJHY//RQGhE2cCF26hGUiCweE5eeHs4I+feLNKGuZ2VR3z1t/eypH/i8C3YFtgOoJt2R0Aqa5+49AI2Bn4MOo8NcHppnZdilkEZGY1K4dBoR17hyuB+y7LyxaFAp/ly6hWUiyXyqDvOq7+16b+DldiZp83P1jYNvCB5I98heR7FGxIjz3XJgW+q67oFEjWLMmzA7arl3c6SQZqRz5jzWzg1P9ADOrCnRA8/6LlDl33hnWBvjll3D0P2JE+F2yXyrF/31gjJn9FvXXX2pmSzb2Indf7u7buPsGZwpx94Y66hcpnfLz4ZVXQht/5crw8MPwj3/A0KEaFJbtUin+dwL7AlXcvYa7V3f3GhnKJSJZrrCNf+RIuPVWePll2GqrcE2ge/cwJmD27LhTSlFSKf7fADM92e5BIlKmTZkSCn9hG3+7dqHN/9RTYdAg+PjjMF30FVfA8uXxZpW/SqWr52PALsBY4H/z+iTT1TMd1NVTpHRZuDA0Bz32WJgj6N57w1TRUrLS0dXzK+B1oBKpd/UUkRxTpw4MGQJvvRWmiejcGY46Cr7WhC5ZIemunu5+fSaDiEjZtP/+MH166BJ6/fWw225huohevUKXUYlH0kf+ZpZvZm+sf8tkOBEpGypWDE1As2dDhw5w+eXQtGk4K5B4pNLscylhNa/LgGuBGYAa4UUkaQ0ahMFhL7wQLgIfcEDoGaSpu0peKit5TU24vevulwAHZi6aiJRVRxwBs2aFnkBPPgmNG4ceQmvWxJ0sd6TS7FMr4VbbzA4BtspgNhEpw6pWhX794MMP4Z//hLPPhjZtwn3JvFSafaYSmnkKgPeAS4AzMhFKRHLH7rvDm2+GUcFffAHNm4c5g5YujTtZ2bbR4m9mLcxsO3ff2d13Aa4H5kQ3jd8Tkc1mFuYImjMHzjwT7r47TBMxapSmiciUZI78HwJWAphZW6AfMBRYDAzKXDQRyTW1asGDD4ZFY7bdFo47LiwgP3du3MnKnmSKf3l3L5yn73hgkLuPdvdrgV0zF01EclWrVmH6iLvvDovI77EH3HAD/PHHRl8qSUqq+JtZ4WCw9kBi3/5U1gMQEUlahQpw0UXwySdhdPB114ULwxMmxJ2sbEim+A8HJprZ88BvwNsAZrYroelHRCRj6tWDp5+GV18NXUE7dICuXWHBgriTlW4bLf7ufhPQG3gMaJMwq2c54MLMRRMRWevgg2HmzHAG8Oyz4YLw/ffD6tVxJyudkurq6e7vu/sYd1+esO0zd5+WuWgiIuvackvo2zfsBFq2hAsvDD+nTIk7WemTSj9/EZGs8Le/hWagESPg++/DDuD888NSkpIcFX8RKZXM4Pjjw9iACy8MXUQbN4YnntDYgGSo+ItIqVajBtxzT2j6adgQTj4Z2rcPOwUpmoq/iJQJzZqFwWEDB4b1A/baC665Bn77Le5k2UnFX0TKjPLl4ZxzwlH/CSfATTeFAWKvvBJ3suyj4i8iZU7dujBsGOTnhx5Chx0WBog9/fS6z8vPh/7948kYNxV/ESmzDjwQZsyAm2+Gzz4LZwPnngt//hkKf5cu0KJF3CnjYV5KLovn5eV5QYEWDhORTfPVV3DiifD++2ECuZUrw2CxDh3iTpZZZjbV3fPW364jfxHJCTvvHC4IH388/PILLFsWFpAZNCg3J4xT8ReRnPHmm/D666EXUI0aUKlS2AHsumuYKiKXegap+ItITihs4x85Ev7zn7CQ/M8/w623hoXlL7wQdtkF7rgjLC5f1qn4i0hOmDIlFP527cL9du3CfYC33w47h913h0svDYPF+vWDJUtii5txuuArIpLgvffCmcG4cbD11mFNgZ49w++lUSwXfM2ssZnNSLgtMbOLzew2M5tjZh+Z2Rgzq5nJHCIiyWrdGsaOhQ8+gLZtwyyiDRrAVVfBTz/FnS59Mlr83f1Td2/q7k2B5sAKYAzwGrCnu+8FfAZcmckcIiKpatEiXBeYMQMOOQRuuSXsBC69FH74Ie50m68k2/zbA3Pdfb67j3f3VdH294H6JZhDRCRpTZqEawOzZsExx8Bdd4Vuoz17wrffxp1u05Vk8T+BsCTk+k4HxpZgDhGRlO22Gzz+OHz6aRgsNnAgNGoU5hKaNy/udKkrkeJvZpWAI4Fn1tt+NbAKeLKI1/UwswIzK1i4cGHmg4qIbMSuu8LgwfD553D66TBkSFhc5vTTw7bSoqSO/DsB09z9x8INZtYdOBzo5kV0OXL3Qe6e5+55derUKZmkIiJJaNgwHP1/+WVYRWz48LCucLduMHt23Ok2rqSKf1cSmnzM7BCgD3Cku68ooQwiImlXrx7cfXdo+undG55/HvbcE447Dj78MO50Rct48TezqkAH4NmEzfcD1YHXoi6gD2Y6h4hIJtWtG6aHnjcvdAsdPx6aNoXOnbNzgfmMF393X+7u27j74oRtu7r7joXdQN39nEznEBEpCbVrw403wvz5cMMNYfTwPvuE7qLvvht3urU0vYOISAbUrAnXXht2ArfcAtOmQZs2cNBBYSqJuCdXUPEXEcmg6tXh8svDegJ33hmWmDzoINh//zCFRFw7ARV/EZESULUq9OoVegcNGABffw2dOkHLlvDCCyW/E1DxFxEpQVtuCeedB198AQ8/HKaV7twZ9t4bRo2CNWtKJoeKv4hIDCpVgjPPDCOGhw6F338P3UP33BO6doUJE9Z9froXm1fxFxGJUYUKcMopYe6gESOgfPnws2NH6NMnc4vNaz5/EZEssmZNGCjWp09oGtpqKzALi80XLkSTCi3gLiJSCpQrB0cfDZ99BiecAIsXQ48em1b4i/2c9L6diIikw5tvhnb/a6+FRx8NTT/ppOIvIpJlEhebv+GG8LNLl/TuAFT8RUSyTFGLzadzjiBd8BURKcN0wVdERP5HxV9EJAep+IuI5CAVfxGRHKTiLyKSg0pNbx8zWwjM38SX1wZ+SmOcTCtNeUtTVihdeZU1c0pT3s3N2sDd66y/sdQU/81hZgUb6uqUrUpT3tKUFUpXXmXNnNKUN1NZ1ewjIpKDVPxFRHJQrhT/QXEHSFFpyluaskLpyqusmVOa8mYka060+YuIyLpy5chfREQSqPiLiOSgMl38zWxHM8s3s9lmNsvMLoo7U1HMbEsz+8DMPoyyXh93po0xs/JmNt3MXoo7y8aY2Twz+9jMZphZ1k8Pa2Y1zWyUmc0xs0/MbN+4M22ImTWOvtPC2xIzuzjuXEUxs17R/18zzWy4mW0Zd6bimNlFUdZZ6f5ey3Sbv5ltD2zv7tPMrDowFTjK3WfHHO0vzMyAqu6+zMwqAu8AF7n7+zFHK5KZXQLkATXc/fC48xTHzOYBee5eKgb2mNlQ4G13f8TMKgFV3H1RzLGKZWblge+Alu6+qQMyM8bM6hH+v9rd3X8zs5HAK+7+WLzJNszM9gRGAPsAK4FxwDnu/kU63r9MH/m7+wJ3nxb9vhT4BKgXb6oN82BZdLdidMvaPbOZ1QcOAx6JO0tZY2ZbAW2BwQDuvjLbC3+kPTA3Gwt/ggpAZTOrAFQBvo85T3F2Aya7+wp3XwVMBI5J15uX6eKfyMwaAnsDk2OOUqSoGWUG8F/gNXfP2qzA3UAfYE3MOZLlwHgzm2pmPeIOsxE7AwuBIVGz2iNmVjXuUEk4ARged4iiuPt3wO3A18ACYLG7j483VbFmAvub2TZmVgU4FNgxXW+eE8XfzKoBo4GL3X1J3HmK4u6r3b0pUB/YJzrtyzpmdjjwX3efGneWFLRx92ZAJ+B8M2sbd6BiVACaAQPdfW9gOXBFvJGKFzVNHQk8E3eWopjZ1kBnws51B6CqmZ0Ub6qiufsnwK3AeEKTzwxgdbrev8wX/6j9fDTwpLs/G3eeZESn+PnAITFHKcp+wJFRO/oI4CAzeyLeSMWLjvpw9/8CYwjtqNnqW+DbhDO/UYSdQTbrBExz9x/jDlKMfwFfuftCd/8TeBZoHXOmYrn7YHdv7u5tgV+Bz9L13mW6+EcXUQcDn7j7nXHnKY6Z1TGzmtHvlYEOwJxYQxXB3a909/ru3pBwqv+Gu2ftEZSZVY0u+BM1nxxMOKXOSu7+A/CNmTWONrUHsq6Twnq6ksVNPpGvgVZmViWqDe0J1wGzlpltG/3cidDe/1S63rtCut4oS+0HnAx8HLWlA1zl7q/EF6lI2wNDox4T5YCR7p71XShLibrAmPD/OxWAp9x9XLyRNupC4MmoOeVL4LSY8xQp2qF2AM6OO0tx3H2ymY0CpgGrgOlk/zQPo81sG+BP4Px0Xvgv0109RURkw8p0s4+IiGyYir+ISA5S8RcRyUEq/iIiOUjFX0QkB6n4i2wiM2toZlk7XkCkOCr+IiI5SMVfJA3MbJdoErYWcWcRSUZZH+ErknHRNAwjgO7u/mHceUSSoeIvsnnqAM8Dx2TjIkEiRVGzj8jmWUyYMKxN3EFEUqEjf5HNsxI4GnjVzJa5e9pmXRTJJBV/kc3k7sujBW5ei3YAL8SdSWRjNKuniEgOUpu/iEgOUvEXEclBKv4iIjlIxV9EJAep+IuI5CAVfxGRHKTiLyKSg/4fqToEXnp1tHgAAAAASUVORK5CYII=\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEWCAYAAACOv5f1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAtwUlEQVR4nO3debxV8/7H8denEiKVdA2JTLeEJAdFhpAxlZC6hoxxfyRE4nIN11TXPBdR5qGkDCEajOEUKoVC3bihRKUQ+fz++K5z253OsNdp77P2Ofv9fDz24+y99lp7f87u9NlrfYfP19wdERHJLzWSDkBERCqfkr+ISB5S8hcRyUNK/iIieUjJX0QkDyn5i4jkISV/WYOZnWJmb6U8djPbPsmYMiWTv4uZzTGzgzPxWkkzsxPM7NUsvfYEMzujlOeuMrNHs/G+UjYl/zwVJa5fzOznlNtdSccF//vycTO7tdj2ztH2oWm+TqlJJ9vMbKiZrSj2+R6fodde18xuMLP/RP+Gs8zsYjOzNI9vGn2OtYq2uftj7n5IJuKTqqFW+btINXaUu7+WdBCl+ALoZmYXu/sf0baewOcJxhTXQHe/vKIHm1mtlN891TPAZsARwKdAAfAI0AQ4r6LvJ/lFZ/6SriPM7EszW2hm/zazGgBmVsPMLjezuWb2vZk9bGb1oueGmVnf6H7j6GzznOjxdma2qOh1SvAtMA04NNp/Y2BvYHTqTmbWxszeMbOfzOxjMzsg2n4dsC9wVwlXNQdHZ8s/mdndRWfMZf0u0fMnRc/9YGb/qOgHaWZnmtns6PcfbWZbpDznZnaOmc0CZpVw7EHAIcAx7j7d3f9w90nAicA5RU1a0VXPDWb2vpktMbNR0WcI8Eb086fos2lbSlPf/0Wf01Iz+1f0b/ZO9HpPm1ntaN8GZvaCmS0wsx+j+1tW4HNZx8yeMLMRRa8t2aPkL+k6mnCG2RroDJwWbT8lurUHtgU2BIoS7UTggOj+/sCXwH4pj9909z/LeM+HgZOj+92BUcBvRU+aWWPgReBaYGPgImCEmTVy938AbwLnuvuG7n5uyut2BPYAWgLdiL5gyvpdzKwFcC9wErAF0BCoSII7ELghet/NgbnAk8V26wLsBbQo4SU6AO+5+7zUje7+HvA1cFDK5pMJ/06bA38Ad0Tbi/4N6kefzbulhHsosDvQBugHDCZ8yTQBdgZ6RPvVAB4Ctga2An5h1d9AWsxsfeA5wr9vN3dfEed4iU/JP789F539Ft3OLGPfAe6+yN3/A9zGqv/4JwC3uPuX7v4zcCnQPWpPngi0i87u9wMGAvtEx+0fPV+WkcAB0dn3yYQvg1QnAi+5+0vu/qe7jwUKCc0hZbnR3X+KfpfxQKs0fpdjgRfc/Q13/w24AijriwvgopTPdmHKezzo7lOi17kUaGtmTVOOuyH6rH8p4TU3AeaX8n7zo+eLPBJdHSyL4u1mZjXLiTnVQHdf4u6fANOBV6PPZjEwBtgNwN1/cPcR7r7c3ZcC1xH+fdO1EfAyoanvVHdfGeNYqSAl//zWxd3rp9zuL2Pf1DPNuYSzX6Kfc4s9VwvY1N2/AJYRkuu+wAvAf82sGWkk/yj5vQhcDjR097eL7bI1cFzqFxjQjnCmW5ZvU+4vJ5zhl/m7RM/97zOIEuoP5bzPTSmfbVFSXu09oi+ZH4DGKcetdlZfzEJK//02j54v6XXmAuuw+pdDeb5Luf9LCY83BDCzOmY2KGoSW0JoVqof44umDeEq7EZXpclKo+Qv6WqScn8r4L/R/f8SknDqc3+wKlFMJJw113b3b6LHPYEGwEdpvO/DQF+gpOGA8whnt6lfYBu4+43R83ETSVm/y3xSPgMzq0No+olrtfcwsw2i1/kmZZ+y4n4N2MvMUv89MLO9ovjGpWwu/m/2O+HLIdMJti/QDNjL3TdiVbNSWqOPgFcJTWGvm9mmGY5NSqHkL+m6OOrYawL0AZ6Ktj8BXGBm25jZhsD1wFMpo1QmAueyqpNxQvT4rTQv7ycS2rnvLOG5R4GjzOxQM6tpZuuZ2QEpnY3fEdru01XW7zIc6Ghm7aLOyGuo2P+fJ4BTzayVma0bvcd77j4nnYOj0VmvE/o2dop+7zaEz+Jed0/tJD7RzFpEX1TXAMOjz3wBockqzmdTlrqEK4Gfok7lK+O+gLsPBB4nfAHEuTqRClLyz2/P2+rj0EeWse8oYDLhbP1FYEi0/UHCMMM3gK+AX4HeKcdNJCSHouT/FlAn5XGZPHjd3ReV8Nw8QufzZYSENg+4mFV/17cDx0YjUO4ofnwJSv1donbvcwgJaj7wI6GDNZYoeV8BjIheZztCZ3YcxxD6Kl4GfiYk/iGs/rkT/S5DCc1c6xENA3X35YR2+bej5rI2cX+PYm4D1idcVUyK4orN3f9F6PR9LWVkkmSJqYlNpPoxswnAo+7+QNKxSG7Smb+ISB5S8hcRyUNq9hERyUM68xcRyUNVprDbJpts4k2bNk06DBGRKmXy5MkL3b1R8e1VJvk3bdqUwsLCpMMQEalSzGxuSdvV7CMikoeU/EVE8pCSv4hIHlLyFxHJQ0r+IiJ5qNom/4EDYfz41beNHx+2i4jku2qb/PfYA7p1W/UFMH58eLzHHsnGJSKSC6rMOP+42reHp5+GLl1g//3h3XfD4/btk45MRCR51fbMH0Ki32ILeP552G472HffpCMSEckN1Tr5jx8PCxfCnnvCe+9Bmzbw449JRyUikrxqm/yL2viffjok/osvhsmTYeedYebMpKMTEUlWtU3+H3ywehv/wIFwxx2weDHstRe8+GKy8YmIJKnaJv9+/dbs3O3dO5z177ADHHUUDBgAWs5ARPJRtU3+pWnSBN58MzQJ9e8PJ54Iv/ySdFQiIpUr75I/QJ068MQTcN118PjjsN9+8M03SUclIlJ58jL5A5jBZZfBqFHw6adQUACTJiUdlYhI5cjb5F+kU6eQ9OvUCZPBhg1LOiIRkezL++QPsNNO8P770K4dnHIK9O0Lf/yRdFQiItmj5B9p2BBefjmMCLrlFujYEX76KemoRESyQ8k/xTrrhLkAgwfDuHFhPsBnnyUdlYhI5in5l+DMM+H110MpiFat4MYbV39epaFFpKpT8i/FvvuGWcKNG8Oll8LZZ4cJYSoNLSLVQdaTv5ldYGafmNl0M3vCzNYzs23M7D0zm21mT5lZ7WzHURFbbw0ffxzmAQwaBLvsAscdp9LQIlL1ZTX5m1lj4DygwN13BmoC3YEBwK3uvj3wI3B6NuNYGxtsABMmwAEHwCefwMqVYZuISFVWGc0+tYD1zawWUAeYDxwIDI+eHwZ0qYQ4KmzCBJg+HXr2hCVLoG3b0Ob/559JRyYiUjFZTf7u/g1wE/AfQtJfDEwGfnL3opH0XwONSzrezHqZWaGZFS5YsCCboZYqtTT00KEwciTUqgWXXAKHHgrz5ycSlojIWsl2s08DoDOwDbAFsAFwWLrHu/tgdy9w94JGjRplKcqyFS8N3akTjBkDxxwDb78NLVvCSy8lEpqISIVlu9nnYOArd1/g7r8DzwL7APWjZiCALYGcLatWUmnoAw+E4cPD4jBbbAFHHgkXXAC//ZZMjCIicWU7+f8HaGNmdczMgIOAGcB44Nhon57AqCzHkRU77hhWCevdG267LSwTqUlhIlIVZLvN/z1Cx+4UYFr0foOBS4ALzWw20BAYks04smm99cKs4NGjYd48aN0aHnpIi8SISG4zryJZqqCgwAsLC5MOo0zffAMnnRQ6ibt3h/vug3r1ko5KRPKZmU1294Li2zXDN4MaN4axY8MiMc88E0pDvPtu0lGJiKxJyT/DatYMi8S89VZ4vO++4ctg5cpk4xIRSaXknyVt2sBHH8Gxx8Lll8Nf/xqGjKZSgTgRSYqSfxbVqxfWCn7wwdAf0L17uAoAFYgTkWQp+WeZGZx6aigQt9124Spg991VIE5EkqXkX0maNQv1gdq0gSlTwoSwJUuSjkpE8pWSfyV65x2YPRtOPx1++QW6dAlXAN9+m3RkIpJvlPwrSWqBuAceCPWB6tSBUaPCTOEhQzQxTEQqj5J/JSleIK5DB3jhBejTJxSHO+MMOOigcGUgIpJtmuGbA/78M1wNXHwxrFgBV18NF14YSkeLiKwNzfDNYTVqQK9eMHMmHHZYWCtgzz1Dx7CISDaknfzNbB8z2yC6f6KZ3WJmW2cvtPyzxRZhsZgRI8IiMXvuGUpKL1+edGQiUt3EOfO/F1huZrsCfYEvgIezElWe69o1XAWcdhr8+99h4fjXX086KhGpTuIk/z88dBB0Bu5y97uButkJS+rXh8GDYdy40Cx08MHhy+Cqq8LIoVQqEyEiccVJ/kvN7FLgJOBFM6sBrJOdsKRI+/YwdSr07w8PPxzWDujcOXwpgMpEiEjFxEn+xwO/Aae5+7eE5Rf/nZWoZDXrrw833ACFhbDttrB0KRx+OJx//qq5AyoTISJxpJ38o4Q/Alg32rQQGJmNoKRkrVrBpElw001heOjtt4f1hJX4RSSuOKN9ziQsyTgo2tQYeC4LMUkZatUKS0XWrQubbRbO+jt3DuUiRETSFafZ5xxgH2AJgLvPAv6SjaCkdEVt/CNGwJw5oTbQ6NHQogVMm5Z0dCJSVcRJ/r+5+4qiB2ZWC6ga04OrkdQyEeuuG+4PGAALF4ZO37vuUo0gESlfnOQ/0cwuA9Y3sw7AM8Dz2QlLStOv35pt/P36wRdfhPb/3r1DM9DChcnEJyJVQ5zk3x9YAEwDzgJeAi7PRlAS31/+Ai++CLfdBq+8EorFvfZa0lGJSK6Kk/zXBx509+Pc/VjgwWib5AizUCX0/ffDEpKHHBLqBK1YUf6xIpJf4iT/11k92a8P6NwyB+26K0yeDGeeGWb+7rOPSkWLyOriJP/13P3nogfR/TqZD0kyoU4dGDQIhg8P/QG77QbDhqkzWESCOMl/mZm1LnpgZrsDGl2e4445Jiwe37o1nHIKnHACLF6cdFQikrQ4yf984Bkze9PM3gKeAs7NSlSSUU2ahFpA//pXGBraqhWcc44KxInkszjlHT4AmgN/B84GdnT3ydkKTDKrZk24/HJ4883w+L77oGPHVSOCVCBOJL/EXclrD6Al0BroYWYnZz4kyaa2beGjj0KiX74cjjgijBBSgTiR/BKnts8jwE1AO8KXwB7AGutCSu6rVw8efxyGDg2P77gjNAXtu2+SUYlIZYqzRHgB0MKryorvUiYz2GqrUCCuYcPQ/NO8OTzxhJp+RPJBnGaf6cBmcV7czJqZ2UcptyVmdr6ZXWVm36RsPyJe2LK2itr4hw+Hzz4LK4R99RXstVdoBlq6NOkIRSSb4iT/TYAZZvaKmY0uupV1gLt/5u6t3L0VsDuwnFVrANxa9Jy7v1Sh6KXCUgvEmcGVV8Jzz0GbNnDnnbDjjuGxiFRPcZp9rlrL9zoI+MLd55rZWr6UrK1+/dbcdtRR4TZpEvTqBUcfDV26hC+DLbes9BBFJIviDPWcWNItxnt1B55IeXyumU01swfNrEFJB5hZLzMrNLPCBQsWxHgrWRtt2oTyEAMGhCJxO+4YOoVXrkw6MhHJlDijfdqY2Qdm9rOZrTCzlWa2JM1jawOdCGWgAe4FtgNaAfOBm0s6zt0Hu3uBuxc0atQo3VAlA9ZZJ1wdTJ8eagP16RO+FD78MOnIRCQT4rT53wX0AGYRirqdAdyd5rGHA1Pc/TsAd//O3Ve6+5/A/cCeMeKQSrTttjBmTBgFNG9eGAl00UWwbFnSkYnI2og1ycvdZwM1o8T9EHBYmof2IKXJx8w2T3nuaMJIIslRZtC9O8ycCaefDjffHJaNfPHFpCMTkYqKk/yXR803H5nZQDO7IJ3jzWwDoAPwbMrmgWY2zcymAu2BC+IELclo0CBUCn3zTdhww1AeomXLMFw0lWoEieS+OMn/pGj/c4FlQBOga3kHufsyd2/o7otTtp3k7ru4e0t37+Tu8+MGLslp1y60/V97LXz6aZgvcP758OefqhEkUlVYuhN2zayPu99e3rZsKSgo8MLCwsp4K4lh9uyQ7D/8MMwQXrhQNYJEcomZTXb3NUrxxDnz71nCtlMqHJFUC9tvH4aFdugQrgJq1w5DQ0Ukt6XTZt/DzJ4Htkmd2WtmE4BFWY9Qct6ECeHMv1s3+O9/wzKS09WFL5LT0pnh+w5hLP4mrD4efykwNRtBSdVR1MZf1NRz4IHwf/8XagSNHBkWkReR3FPumb+7z3X3CcDBwJvRrN75wJaA6jTkudQaQQBnnRXKRW+0UVgrYNCgZOMTkZLFafN/A1jPzBoDrxJG/wzNRlBSdfTrt2bn7vHHw+efh7P+s88Ok8JUGkIkt8RJ/ubuywnDO+9x9+OAnbITllR1devC6NFw7rlhUtixx2pWsEguiZX8zawtcAJQNLezZuZDkuqiVq1QEfT222HUKNh/f5ivGR0iOSFO8j8fuBQY6e6fmNm2wPisRCXVynnnheT/6aehI3iqhgmIJC5uSedO7j4gevylu5+XvdCkOjnqqFAWYuXKUCV0zJikIxLJb+mM878t+vl8sXH+5a7kJZJqt93g/ffDxLCOHeGee5KOSCR/pTPO/5Ho503ZDETyQ+PG4QqgRw845xyYNQtuuglqqvdIpFKVm/zdfXL0M86qXSKl2nDDsD5w375w223w5Zfw2GNhu4hUjnKTv5lNA0qt/ubuLTMakeSFmjVD4t9hB+jdOzQJTZgQrgwgzBz+4IOS1xoWkbWXTrNPx+jnOdHPomagEynjS0EkHeecA0uWwGWXQatWMHYs/PjjqpIRIpId6TT7zAUwsw7uvlvKU5eY2RSgf7aCk/xw6aXQqFEoDbHnnmH94MceU1lokWyKO8lrn5QHe8c8XqRUZ5wR5gP8/jssXw4nnBAWiJk3L+nIRKqnOMn7dOAeM5tjZnOAe4DTshKV5J3x4+HRR+GKK8Jyke3awV13wXbbhXWDP/886QhFqpc4k7wmu/uuwK7Aru7eyt2nFD1vZiUt9iJSrtSy0NdcAyNGwJQp8Mgjq6qENm8Oxx0XtovI2ovdbOPui1PX403RJwPxSB4qXha6ffvweN68UBtozhzo3x9efRV23x0OPRQmToQ0VyAVkRKkvYZvuS9k9mGxDuGM0hq+sngx3Hsv3HorfP89tG0bOouPPBJqqPdJpESZWMO3PDoPk6yqVy9cAcyZA3ffHZaM7NQpLBv5+ONw442hCSnV+PEwcGAi4YrktEwmf63qJZVi/fXDUpGzZsHDD8Off4bRQXfcEb4MXnkl7FfUl7DHHsnGK5KLMpn8387ga4mUa5114KSTYNq0UC6iSRP4+Wc4/HDo0GH1tYVFZHXltvmb2YVlPe/ut2Q0olKozV/K4x7O9s84A776CrbeOlQR/ctfko5MJDlr0+ZfN7oVAH8HGke3s4HWmQxSZG2YhdvSpdC5M8ydCzvtBO+9l3RkIrmn3OTv7le7+9XAlkBrd+/r7n2B3YGtsh2gSLpS5ws89xwMGgSLFoUJY/ffn3R0IrklTpv/psCKlMcrom0iOaH4fIFevcKEsW23DffPPBN+/TXZGEVyRTpVPYs8DLxvZiOjx12AYRmPSKSCSir/3KVLWELyyivhuuvg44/DF0KTJpUenkhOiVPe4TrgVODH6Haqu1+frcBEMqVmTbj2Wnj22bCI/O67rzkfQCTfxB3qWQdY4u63A1+b2TZZiEkkK44+Ooz+adgwDAW9+WaViJD8lXbyN7MrgUuAS6NN6wCPlnNMMzP7KOW2xMzON7ONzWysmc2Kfjao+K8gkr7mzcMXQJcucNFF0L17mBsgkm/inPkfDXQClgG4+38JQ0BL5e6fRdU/WxFGBy0HRhIWgHnd3XcAXkcLwkglqlsXnnkmlIMYPjzUCJo1K+moRCpXnOS/wsOMMAcwsw1ivtdBwBfRymCdWdVZPIzQeSxSaczgkkvg5Zdh/vxQAuKFF5KOSqTyxEn+T5vZIKC+mZ0JvAbEGT3dHXgiur+pu8+P7n9LKUNGzayXmRWaWeGCBQtivJVIejp0gMLCsGjMUUfBVVeFWkEi1V1ayd/MDHgKGA6MAJoB/3T3O9M8vjahyeiZ4s+lXk2U8Nxgdy9w94JGjRql81YisTVtCm+9BT17wtVXQ4sWMHr06vuoOqhUN2kl/yhBv+TuY939Yne/yN3Hxnifw4Ep7v5d9Pg7M9scIPr5fayoRTJs/fXhoYfgnntg9uwwMmjIkPCcqoNKdRSn2WeKmVX0z78Hq5p8AEYDRcs+9gRGVfB1RTLGDP7+d3jjDahfPxSI69pV1UGleoqT/PcC3jWzL8xsqplNM7Op5R0UdQx3AJ5N2Xwj0MHMZgEHR49FcsLee8Mnn4RZwCNHwkYbweabJx2VSGbFKe9waEXewN2XAQ2LbfuBMPpHJCfNnAm//AKHHAJjx4bqoH36wD//Ga4KRKq6OOUd5kbDNH8hdNCW2lErUpWlVgd95ZVQC6h27bB28F//GiqErlyZdJQiayfODN9OUTPNV8BEYA4wJktxiSSmeHXQo4+Gl16C884LM4R79Qqdv2++mWycImsjTpv/v4A2wOfuvg2h2WZSVqISSVC/fmt27rZvD7ffDhMnwpNPwsKFsN9+0KMHzJuXTJwiayNO8v89aquvYWY13H08YXUvkbxhBscfH6qDXnllWDSmWTO45prQRyBSVcRJ/j+Z2YbAG8BjZnY7UZ0fkXxTp06YDfzpp6vWC2jePNQMUqVQqQriJP/OhM7eC4CXgS+Ao7IRlEhVsfXW8NRTMGECNGgQOorbtw+LxojksjijfZa5+0p3/8Pdh7n7HVEzkEje239/mDwZ7rsPpk+H1q2hTZvQLJRKZSIkV8QZ7bM0qse/xMx+NbOVZrYkm8GJVCU1a8JZZ4Xy0L17h1FDXbuG+7//rjIRklvMK9BAGRV66wy0cfdKqcVfUFDghYWFlfFWIhkxYwacfHK4ImjUCFasCDOGVSZCKpOZTXb3NQbnxF3GEQiF3tz9OSo461ckH7RoEc7+u3WDBQtg8WJ45JEwTFQkaXGafbqm3I41sxuBX7MYm0iVN2ECjBsH/fuHyqHDhoWhoUOGaN0ASVacM/+jUm6HAksJTT8iUoLUMhE33AAvvgj16kHjxqFi6H77hc5hkSSkXdjN3U/NZiAi1U3xMhHt24c6Qe+/D5tuGhaQ3203uPDCUDBug7gLo4qshbQ7fM3sjrKed/fzMhJRKdThK9XNDz+EdYSHDIGttoK77goTxkQyKRMdvusBrYFZ0a0VUBuYHN1EJIaGDeGBB0KBuLp1oVOnUETuP/9JOjLJB3GSf0vgAHe/M1q79yCgVTTha1h2whOp/tq1gylT4MYbQwnpFi3g5pvD3ACRbImT/BsAG6U83jDaJiJrqXbt0AQ0Y0boG7joIigogHffTToyqa7iJP8bgQ/NbKiZDQOmANdnJyyR/NS0KYweHSaDLVoUlpTcay8YVWyVa5WJkLUVp7bPQ4R1fEcS1uNtq+Yekcwzgy5dwlKSfftCYWHoC7j00lAxVGUiJBPiTPLaB1jq7qOAukA/M9s6a5GJ5LkNN4Sbbgr9Ac2bhz6BbbaBY45ZfQipSEXEafa5F1huZrsCFxJKOj+clahE5H923TVMBjviCJg7F5YuhWnTNENY1k6c5P+Hh0kBnYG73f1uwhWAiGTZxIlhclifPqFZqE+fUEZ61qykI5OqKk7yX2pmlwInAi+aWQ1gneyEJSJFUstE3HYbvPxyaBL68ENo2TIMC125MukopaqJk/yPB34DTnf3b4EtgX9nJSoR+Z/iZSIOPDCMCDr/fOjQIQwLbdcudBCLpKtC9fxLfCGzd929bUZerAQq7yCyJnd4/HE47zxYtgyuvjqMEKqVdtUuqe4yWs+/FOtl8LVEJA1mcMIJ8MkncOSRoXT03nurWqiUL5PJPzOXECIS22abwfDhYTH5r74Kawhfe61KREjpMpn8RSRBZqFjeMaMsHbwFVeE2cEff5x0ZJKLyk3+ZrZumq9laxmLiGRAo0bw5JNh7YBvvgk1gq68MqwhLFIknTP/dwHM7JFy9jtp7cMRkUzp2jVcBRx/PFxzDWy9NQwatPo+qhGUv9JJ/rXN7G/A3sXW8e1qZl2LdnJ3dTGJ5JiGDeHRR8PQ0N9/h7PPDh3Ev/2mGkH5Lp0BYWcDJwD1Cev3pnJCkbdSmVl94AFg52j/0whrAJ8JLIh2u8zdX0o3aBGJ56ijwmzgHj3C0NCXX4Y//gjVQ1UjKD+Vm/zd/S3gLTMrdPchFXiP24GX3f1YM6sN1CEk/1vd/aYKvJ6IVECDBiHpn3BC+AIAGDAgjBRq0SLZ2KTyxRnt84iZnWdmw6NbbzMrs7yDmdUD9gOGALj7Cnf/qeLhisjaGD8eXn0VLrssLBj/9tuhRETv3mFNYckfcZL/PcDu0c97COv53lvOMdsQmnYeMrMPzewBM9sgeu5cM5tqZg+aWYkrgplZLzMrNLPCBQsWlLSLiKQptUbQddfB88/DeutBx45wzz2www5wxx2aG5Av4iT/Pdy9p7uPi26nAuV1FdUi+pJw992AZUB/wpfGdoRF4OcDN5d0sLsPdvcCdy9o1KhRjFBFpLjiNYLat4dnngkzgj/+GHbfPVQLbdkSxoxJNlbJvjjJf6WZbVf0wMy2BcqrJfg18LW7vxc9Hg60dvfv3H2lu/8J3A/sGSdoEYmvX781O3fbtw/bd945NAeNHh0qhB5xRLh9+mkysUr2xUn+FwPjzWyCmU0ExgF9yzogqv45z8yaRZsOAmaY2eYpux0NaJioSMLMwqig6dNDmeh33oFddglXA4sWJR2dZFqsqp7RbN+iRP6Zu/+W8lwHdx9bwjGtCEM9awNfAqcCdxCafByYA5zl7vPLem9V9RSpXAsWhBIR998P9euHiWJnnaWKoVVNaVU9M1nSeYq7t87Ii5VAyV8kGVOnwgUXwLhxYUho27ZhuGhqE9L48aFPoV+/5OKUklVGSWfV9hGphlq2hNdeCxPCfvsNhgyBww6Dh6MVvDVTuGpSSWcRKZcZdOkS1g0YOBBq1oSePcNVwHHHrT6KSKoGlXQWkbStuy5cfHFYM2C33WDSpHA18O23YVUxqToymfznZPC1RCSHzZgB8+bBaafBr7/C3/4Wzvw/+STpyCRdaSd/M6tpZp2iEg8XFt2Knnf3rmUdLyLVQ+pM4SFDwoSwDTeEKVNg113DGsJLliQdpZQnzqCt54FfgWnAn9kJR0RyXfGZwgcfHCaHTZgQmn9uvRWeeAJuuilUETUNBclJaQ/1NLOp7t4yy/GUSkM9RaqGDz6A//s/KCyE/feHu+4KM4glGZkY6jnGzA7JYEwiUg3tsUfoCB40CKZNg1at4MIL1RSUa+Ik/0nASDP7xcyWmNlSM9M/p4isoWZN6NULPv8cTj8dbrsNmjWDxx7TqKBcESf53wK0Beq4+0buXtfdN8pSXCJSDTRsGK4A3nsPmjSBE0+EAw4I9YMkWXGS/zxgumeqHoSI5I2ipqDBg0Pib9UK9t0XXnhh9f20oHzliZP8vwQmmNmlJQ31FBEpS40acOaZoSnojDPgrbegUyf4xz9CU5DKRFSuOMn/K+B1QnXOuik3EZG0NWwI990XRgX99a9w/fXQtCkcc4zKRFSmtMf5u/vV2QxERPJLQUGYKdypE7z4YpgP8MILYUWxjdSbmHVxZviON7NxxW/ZDE5EqreJE0NncN++oW7QLbdA8+Zhkph6F7Mrzgzfi1LurwccA/yR2XBEJF+klolo3x6OPBK6doW6dUOtoPvvDxPEWrRIOtLqKU6zz+Rim942s/czHI+I5ImSFpR/9tlwJVC/Plx2WagVdMEF8M9/hvpBkjlxyjtsnPKwBlAA3O7uzUo5JKNU3kEkvyxYAP37w4MPQuPGoWbQsceqVlBcmSjvMBkojG7vABcCp2cmPBGR1TVqFKqGvvNOuN+tGxxyCHz2WdKRVQ/lJn8z28PMNnP3bdx9W+Bq4NPoNiPbAYpIfmvbNhSJu/PO0FS0yy6hSWjZsqQjq9rSOfMfBKwAMLP9gBuAYcBiYHD2QhMRCWrWhHPPDWf9f/sb3HBD6AgeOVKjgioqneRf090XRfePBwa7+wh3vwLYPnuhiYisbtNNYehQeOMNqFcvjA5q3hweeWT1/VQmonxpJX8zKxoVdBCQOrY/zlBREZGM2HffsHLYbbeF5SRPPhlOOgmWL1eZiHSlk/yfACaa2SjgF+BNADPbntD0IyJS6WrVgj594Isv4KCD4NFHw6igLl1UJiId5SZ/d78O6AsMBdqlVPWsAfTOXmgiIuXbfHN47bVw5v/TT2HRmLvvhq+/Tjqy3JbWUE93n+TuI919Wcq2z919SvZCExFJz/jxYSH5yy6DOnXCmsI77hjmBvyhOgQlijPOX0Qk56SWibjuulAcrm5d2GmnsHxkQUFYS0BWp+QvIlVaSWUihg+Ho4+GESPghx9g773hrLNg0aKyXyufpF3eIWkq7yAiFbF0KVx1Fdx+O2y8Mdx0U+gfyJcyEZko7yAiUuXUrQs33wyTJ8P220PPnuHqYObMpCNLlpK/iOSFXXcNS0cOHgxTp4bHl10W5gbko6wnfzOrb2bDzexTM5tpZm3NbGMzG2tms6KfDbIdh4hI0TrCqWUidtoprCSWbyrjzP924GV3bw7sCswE+gOvu/sOhHWB+1dCHCIiQKgSOnRoWEmsTh3o2BF23hmeemr1/apzmYisJn8zqwfsBwwBcPcV7v4T0JlQHI7oZ5dsxiEiUpL99oMPPwxXALNmQffu8Pe/w++/V/8yEVkd7WNmrQiVP2cQzvonA32Ab9y9frSPAT8WPS52fC+gF8BWW221+9y5c7MWq4jktzlzoEePMCdgk03CF8DIkVW/TERSo31qAa2Be919N2AZxZp4onIRJX4Duftgdy9w94JGjRplOVQRyWdNm4aFY7p1g4ULYfHisIbAV18lHVl2ZDv5fw187e7vRY+HE74MvjOzzQGin99nOQ4RkXJNmADjxsGll4a+gDFjQpmIK66ofovHZDX5u/u3wDwzK1rn9yBCE9BooGe0rScwKptxiIiUJ7VMxPXXhzIRdepAu3Zw7bVh3YAnn6w+i8dUxmif3sBjZjYVaAVcD9wIdDCzWcDB0WMRkcSUVibikEPC/IC//CX0Cey/P3z0UaKhZoTKO4iIpGHlSnjoodAktGhRmC9w7bWhcziXqbyDiMhaqFkTzjgjDAk97zx44AHYYYfQKVwVy0Yr+YuIxFC/flgnYOrUUC76vPNgt91CR3FVouQvIlIBLVrAq6+GuQDLloWlJI85Bvr3D53HqXJxprCSv4hIBZmFNYNnzAgLybz8MtxySygXMWZM2CdXZwor+YuIrKX11gsVQj/7DI47LlQK7dgxLChTNHw012YKK/mLiGTIllvCY4/Bm2+GoaHPPQfrrBOqieaaHAxJRKRq+/33MAKoSxf47js44IBw/7PPEg4shZK/iEgGpc4UHjkyrBVQp07oHN5pJzj3XFiwIOkolfxFRDKq+Ezhww4LpSIuuigsIn/ffWE5yQED4Ndfk4tTM3xFRCrRzJlwySXw/POw1VahjlCPHtnrF9AMXxGRHLDjjjB6dJgUtskmcOKJsNdeYVWxyqTkLyKSgPbtQxPRww/Dt99Wfqewkr+ISEJq1ICTToLPPw/NP+PGhbWE99kndBanyvQsYSV/EZGErb9+qBY6e3aoFjppUigV0atX6BTOxixhdfiKiOSYmTPh9NPh3Xdho43CFcKzz1ZslrA6fEVEqogddwzrCZ94IixZEq4GMl0eQslfRCQHjR8fCsVdcUVYRKZ4pdC1peQvIpJjUmcJX3NN+NmtW2a/AJT8RURyTEnrCT/9dNieKerwFRGpxtThKyIi/6PkLyKSh5T8RUTykJK/iEgeUvIXEclDVWa0j5ktAOZm8S02ARZm8fWzQTFnX1WLFxRzZakqMW/t7o2Kb6wyyT/bzKywpOFQuUwxZ19VixcUc2WpijGnUrOPiEgeUvIXEclDSv6rDE46gApQzNlX1eIFxVxZqmLM/6M2fxGRPKQzfxGRPKTkLyKSh/Iq+ZtZEzMbb2YzzOwTM+tTwj4HmNliM/souv0ziViLxTTHzKZF8axR2tSCO8xstplNNbPWScQZxdIs5bP7yMyWmNn5xfZJ/DM2swfN7Hszm56ybWMzG2tms6KfDUo5tme0zywz65lwzP82s0+jf/eRZla/lGPL/Buq5JivMrNvUv79jyjl2MPM7LPo77p/wjE/lRLvHDP7qJRjE/mcK8Td8+YGbA60ju7XBT4HWhTb5wDghaRjLRbTHGCTMp4/AhgDGNAGeC/pmKO4agLfEiaZ5NRnDOwHtAamp2wbCPSP7vcHBpRw3MbAl9HPBtH9BgnGfAhQK7o/oKSY0/kbquSYrwIuSuNv5wtgW6A28HHx/6uVGXOx528G/plLn3NFbnl15u/u8919SnR/KTATaJxsVBnRGXjYg0lAfTPbPOmggIOAL9w9mzOzK8Td3wAWFdvcGRgW3R8GdCnh0EOBse6+yN1/BMYCh2UrzlQlxezur7r7H9HDScCWlRFLukr5nNOxJzDb3b909xXAk4R/n6wrK2YzM6Ab8ERlxJJNeZX8U5lZU2A34L0Snm5rZh+b2Rgz26lyIyuRA6+a2WQz61XC842BeSmPvyY3vtS6U/p/klz7jAE2dff50f1vgU1L2CdXP2uA0whXgCUp72+osp0bNVU9WErzWq5+zvsC37n7rFKez7XPuVR5mfzNbENgBHC+uy8p9vQUQjPFrsCdwHOVHF5J2rl7a+Bw4Bwz2y/pgMpjZrWBTsAzJTydi5/xajxcw1eZcdBm9g/gD+CxUnbJpb+he4HtgFbAfEIzSlXRg7LP+nPpcy5T3iV/M1uHkPgfc/dniz/v7kvc/efo/kvAOma2SSWHWTymb6Kf3wMjCZfEqb4BmqQ83jLalqTDgSnu/l3xJ3LxM458V9RcFv38voR9cu6zNrNTgI7ACdGX1hrS+BuqNO7+nbuvdPc/gftLiSUXP+daQFfgqdL2yaXPuTx5lfyj9rohwEx3v6WUfTaL9sPM9iR8Rj9UXpRrxLOBmdUtuk/o4JtebLfRwMnRqJ82wOKU5ouklHqGlGufcYrRQNHonZ7AqBL2eQU4xMwaRM0Vh0TbEmFmhwH9gE7uvryUfdL5G6o0xfqjji4llg+AHcxsm+gqsjvh3ydJBwOfuvvXJT2Za59zuZLuca7MG9COcCk/Ffgouh0BnA2cHe1zLvAJYXTBJGDvhGPeNorl4yiuf0TbU2M24G7C6IhpQEHCMW9ASOb1Urbl1GdM+GKaD/xOaE8+HWgIvA7MAl4DNo72LQAeSDn2NGB2dDs14ZhnE9rGi/6e74v23QJ4qay/oQRjfiT6O51KSOibF485enwEYUTeF0nHHG0fWvQ3nLJvTnzOFbmpvIOISB7Kq2YfEREJlPxFRPKQkr+ISB5S8hcRyUNK/iIieUjJX6SCzKxpauVHkapEyV9EJA8p+YtkgJlta2YfmtkeScciko5aSQcgUtWZWTNCyeFT3P3jpOMRSYeSv8jaaUSoAdTV3WckHYxIutTsI7J2FgP/IdSNEqkydOYvsnZWECpTvmJmP7v740kHJJIOJX+RteTuy8ysIzA2+gJIuvSwSLlU1VNEJA+pzV9EJA8p+YuI5CElfxGRPKTkLyKSh5T8RUTykJK/iEgeUvIXEclD/w/GZx7DmCbvFAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] @@ -158,10 +132,8 @@ } ], "source": [ - "import matplotlib.pyplot as plt\n", - "from sklearn.cluster import KMeans\n", "Sum_of_squared_distances = []\n", - "K = range(2,10)\n", + "K = range(2,20)\n", "for k in K:\n", " km = KMeans(n_clusters=k, max_iter=200, n_init=10)\n", " km = km.fit(text_tf)\n", @@ -175,151 +147,78 @@ }, { "cell_type": "code", - "execution_count": 129, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " cluster\n", - "50 0\n", - "81 0\n", - "24 0\n", - "7 0\n", - "75 0\n", - ".. ...\n", - "55 8\n", - "3 9\n", - "39 9\n", - "78 9\n", - "43 9\n", - "\n", - "[87 rows x 1 columns]\n" - ] - } - ], - "source": [ - "true_k = 10\n", - "model = KMeans(n_clusters=true_k, init='k-means++', max_iter=200, n_init=10)\n", - "model.fit(text_tf)\n", - "labels=model.labels_\n", - "clusters=pd.DataFrame(list(labels),columns=['cluster'])\n", - "print(clusters.sort_values(by=['cluster']))" - ] - }, - { - "cell_type": "code", - "execution_count": 130, + "execution_count": 161, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cluster
01
17
23
39
44
......
823
834
842
857
864
\n", - "

87 rows × 1 columns

\n", - "
" - ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAyoElEQVR4nO3dd5iU5dnG4d8FWLFgQWMXG1ETRQQUNSo2sIXYSzS22GKLGrESsRvsRiX22AXBglGxsWKJDSMWQCNRFFQUY1CwgOD9/fG8++2wDrszMMPszl7nccyx87aZ+92BuffpigjMzMzqa1XpAMzMrGlygjAzs7ycIMzMLC8nCDMzy8sJwszM8nKCMDOzvJwgrGCSDpb0fM52SFqrkjGVSinvRdJ4SduV4rUqTdJvJT1Rptd+RtLv53Csn6Q7y/G+VjgnCJtN9uX2naRpOY9rKh0X/H+CCklX1NvfO9v/9wJfZ45fTOUm6e+SZtT7/e5TotdeSNJFkj7KPsP3JJ0iSQVev3r2e2xTuy8i7oqIHUoRnzU/bRo/xVqgXSPiqUoHMQf/AfaWdEpEzMz2HQT8u4IxFat/RJw1txdLapNz77nuA34G7AS8A3QB7gBWAY6f2/ezlsslCJtXO0l6X9IXki6R1ApAUitJZ0n6UNLnkm6XtGR27DZJJ2fPV8r+aj0m215T0pe1r5PHJOAtoGd2/tLAZsDQ3JMkbSrpn5KmSHpD0tbZ/guAXwHX5CkdbZf91T1F0rW1f3k3dC/Z8QOzY/+VdObc/iIlHS5pXHb/QyWtmHMsJB0j6T3gvTzXbgvsAOwREW9HxMyIeAk4ADimtvosKz1dJOkVSV9Leij7HQI8m/2ckv1uus+hWvEP2e9pqqTzss/sn9nrDZK0YHbuUpL+IWmypP9lz1eei9/LApLukTSk9rVt/nCCsHm1G+kv1c5Ab+DQbP/B2aMHsAawGFD7ZTwC2Dp7vhXwPrBlzvZzEfFjA+95O/C77Pm+wEPA9NqDklYCHgHOB5YG/gQMkdQ+Is4EngOOjYjFIuLYnNfdBegKbADsTZaEGroXSesBA4ADgRWBZYC5+RLcBrgoe98VgA+Be+ud9htgE2C9PC+xPfByREzI3RkRLwMTgW1zdv+O9DmtAMwErs72134G7bLfzYtzCLcnsDGwKdAHuIGUiFYBfgHsl53XCrgVWA1YFfiOun8DBZG0CPAg6fPdOyJmFHO9zRsnCMvnweyv6NrH4Q2c+5eI+DIiPgKupO7L4bfA5RHxfkRMA04H9s3qt0cAW2SlhC2B/sDm2XVbZccb8gCwdfZX/O9ICSPXAcCjEfFoRPwYEU8CI0lVLw25OCKmZPdSA3Qq4F72BP4REc9GxHSgL9BQcgP4U87v9ouc97glIv6Vvc7pQHdJq+dcd1H2u/4uz2suC3w6h/f7NDte646slPFNFu/eklo3EnOu/hHxdUSMBt4Gnsh+N18BjwEbAUTEfyNiSER8GxFTgQtIn2+hlgCGkaoVD4mIWUVcayXgBGH5/CYi2uU8bmzg3Ny/WD8k/RVN9vPDesfaAMtHxH+Ab0hfwL8C/gF8IqkjBSSI7AvyEeAsYJmIeKHeKasBe+UmOWAL0l/MDZmU8/xbUkmhwXvJjv3/7yD70v1vI+9zac7vtvaLe7b3yBLRf4GVcq6brXRQzxfM+f5WyI7ne50PgQWYPYE05rOc59/l2V4MQNKikq7Pqt++JlVhtSsiGW1KKs1dHJ5VtCKcIGxerZLzfFXgk+z5J6Qv6txjM6n7MhlB+ut7wYj4ONs+CFgKGFXA+94OnAzk6wo5gfRXcm6SaxsRF2fHi/2yaehePiXndyBpUVI1U7Fmew9JbbPX+TjnnIbifgrYRFLu54GkTbL4hufsrv+Z/UBKIKX+Ej4Z6AhsEhFLUFeFVVCvKuAJUrXb05KWL3FsVgAnCJtXp2SNkasAJwADs/33ACdK6iBpMeBCYGBO75sRwLHUNYw+k20/X2BVwghSvftf8xy7E9hVUk9JrSUtLGnrnAbSz0htCYVq6F4GA7tI2iJrQD2Xuft/dQ9wiKROkhbK3uPliBhfyMVZr7OnSW0t62f3vSnpdzEgInIbtg+QtF6WzM4FBme/88mk6rFifjcNWZxUopiSNYSfXewLRER/4G5SkiimlGMl4ARh+Tys2fvpP9DAuQ8Br5H+6n8EuDnbfwupi+WzwAfA98BxOdeNIH2B1CaI54FFc7YbFMnTEfFlnmMTSA3mZ5C+9CYAp1D37/0qYM+sZ83V9a/PY473ktXDH0P6EvsU+B+pUbgo2Rd8X2BI9jprkhrgi7EHqe1kGDCNlBxuZvbfO9m9/J1UpbYwWRfYiPiW1E7wQlY1t2mx91HPlcAipNLJS1lcRYuI80gN1U/l9Liy+UCu2jNrOSQ9A9wZETdVOhZr+lyCMDOzvJwgzMwsL1cxmZlZXi5BmJlZXs1+sr5ll102Vl999UqHYWbWrLz22mtfRET7hs5p9gli9dVXZ+TIkZUOw8ysWZH0YWPnuIrJzMzycoIwM7O8nCDMzCwvJwgzM8vLCcLMzPJqcQmif3+oqZl9X01N2m9mZnVaXILo2hX23rsuSdTUpO2uXSsbl5lZU9Psx0EUq0cPGDQI9twTOnaE995L2z16VDoyM7OmpcWVICAlg65d4cUXYemlYZNNKh2RmVnT0yITRE0NvPYa7Lgj/Pvf0K0bfPmTZWfMzFq2FpcgatscBg2CRx+Fs8+G0aOhc2eYWPQ6YGZm1avFJYhXX529zaFfP7jsMpg0CTbbDMaMqWh4ZmZNRrNfD6JLly5Risn6Ro1KVU7Tp8M//pGShZlZtZL0WkR0aeicFleCmJNOneCf/4RlloHttoOHH650RGZmlVX2BCGpnaTBkt6RNFZSd0kDJY3KHuMljco5/3RJ4yS9K6lnuePL1aEDvPACrL8+7LYb3Hrr/Hx3M7OmZX6UIK4ChkXEz4ENgbERsU9EdIqITsAQ4H4ASesB+wLrA72A6yS1ng8x/r/lloPhw2GbbeDQQ1O10/Dhs5/jkddm1hKUNUFIWhLYErgZICJmRMSUnOMC9gbuyXb1Bu6NiOkR8QEwDuhWzhjzWXzx1A6x//4wbBjsvDM8/XQ65pHXZtZSlLsE0QGYDNwq6XVJN0lqm3P8V8BnEfFetr0SMCHn+MRs32wkHSFppKSRkydPLkvgCy4Id9wBJ54I33+fksSZZ9Z1kfXIazOrduVOEG2AzsCAiNgI+AY4Lef4ftSVHgoWETdERJeI6NK+fYNLqs6TVq1SF9j+/VPvpgsvhKOPdnIws5ah3AliIjAxIl7OtgeTEgaS2gC7AwNzzv8YWCVne+VsX8VI0KULLLxwev7Xv/50Nlgzs2pU1gQREZOACZI6Zru2BWqHom0HvBMRueOXhwL7SlpIUgdgbeCVcsbYmNo2h4EDUwN2+/azzwZrZlat5sdsrscBd0laEHgfOCTbvy/1qpciYrSkQaQkMhM4JiJmzYcY5yh35PWMGbDXXnDkkWm/q5rMrJp5JHURItL4iCeegLfegjXXnC9va2ZWch5JXWISXHstLLAAHHFEShhmZtXKCaJIK62UejUNH+6R1mZW3Zwg5sLhh8OWW8LJJ6dZYM3MqpETxFxo1QpuuAG++w6OO67S0ZiZlYcTxFzq2BH+/GcYPBgefLDS0ZiZlZ4TxDw45RTYYAM45hj46qtKR2NmVlpOEPNggQXg5ptTO8Spp1Y6GjOz0nKCmEdduqQJ/a6/Hp59ttLRmJmVjhNECZxzTlps6PDD08yvZmbVwAmiBNq2Tb2a/v1vOO+8SkdjZlYaThAlst12sPHGcPHF8MYbdfu9+pyZNVdOECXUt2+afmPvvWHmTK8+Z2bNmxNECfXuDWedlaqaNt/cq8+ZWfPmBFFi55yTqppeeSWNuG7XrtIRmZnNHSeIEnvmGfjww1R6+OKL1A22b9+0ZKmZWXPiBFFCtW0OgwalFegeeCANpjv//JQo5tOyFWZmJeEEUUK5q88B/PrX8NhjcPDB8OWXsOmmcMYZHithZs2DV5SbT6ZMSdOD33ILrLsubLMN7LHH7A3YNTUpyfTpU7EwzayF8IpyTUi7dmnepsceg6lT4brrYOed4fHH03F3iTWzpqbgBCFpc0lts+cHSLpc0mrlC6069eoFb78Nhx2W1pPYeWc49FB3iTWzpqeYEsQA4FtJGwInA/8Bbi9LVFVuySXhxhtT6aFt27R06S67ODmYWdNSTIKYGanBojdwTURcCyze2EWS2kkaLOkdSWMldc/2H5ftGy2pf875p0saJ+ldST2LvaHmZIEF0mPppeG22+Dvf690RGZmddoUce5USacDBwK/ktQKWKCA664ChkXEnpIWBBaV1IOUaDaMiOmSlgOQtB6wL7A+sCLwlKR1ImJWEXE2C7VtDvfdB6utlgbXHXZYKlHstVelozMzK64EsQ8wHTg0IiYBKwOXNHSBpCWBLYGbASJiRkRMAY4GLo6I6dn+z7NLegP3RsT0iPgAGAd0KyLGZiO3S+waa8BTT8GCC8Lxx3t1OjNrGgpOEFlSGAIslO36Anigkcs6AJOBWyW9LummrKF7HVIp5GVJIyTV9t1ZCZiQc/3EbN9sJB0haaSkkZMnTy70FpqUPn1mb3PYeGN46KE0+nr33T3y2swqr5heTIcDg4Hrs10rAQ82clkboDMwICI2Ar4BTsv2Lw1sCpwCDJKkQmOJiBsioktEdGnfvn2hlzV5O+yQxkkMH54G1/34Y6UjMrOWrJgqpmOAzYGvASLiPWC5Rq6ZCEyMiJez7cGkhDERuD+SV4AfgWWBj4FVcq5fOdvXYhx4IFx0Edx7rwfMmVllFZMgpkfEjNoNSW2ABodhZ9VSEyR1zHZtC4whlTx6ZK+zDrAgqcpqKLCvpIUkdQDWBl4pIsaqcOqpcOyxcNllcMUVlY7GzFqqYnoxjZB0BrCIpO2BPwAPF3DdccBdWQ+m94FDSFVNt0h6G5gBHJR1oR0taRApicwEjqnGHkyNkeDKK+HTT+Gkk2CFFWDffSsdlZm1NAXPxZR1az0M2AEQ8DhwU1R4MqfmMhfT3Pj++9Qu8fLLMGyYB9KZWemUei6mRYBbImKviNgTuCXbZ2Wy8MKpZ1O7dmmk9Ztv1h3zWtdmVm7FJIinmT0hLAI8VdpwrL6lloKrr06liW22gY8+8sR+ZjZ/FNMGsXBETKvdiIhpkhYtQ0xWzz77wLRpcPjhabxERBqB7SonMyunYkoQ30jqXLshaWPgu9KHZPkcdljqAvvFF6kRu2PHxq8xM5sXxSSIPwL3SXpO0vPAQODYskRlP1FTA48+Cr/7XUoSXbumta/NzMql4CqmiHhV0s+B2r9d342IH8oTluXKXeu6Rw/o1g2OOy79fOEFWGutSkdoZtWo2BXlugIbkEZD7yfpd6UPyeqrv9b1McfAgAGpXWLLLWHs2MrGZ2bVqZhxEHcAawKjgNrBaxERx5cntMJU8ziIxoweDdtum+ZsevJJ2HDDSkdkZs1FIeMgiunF1AVYr9ID46zO+uvDs8+mJNGjR1qhzl1fzaxUiqliehv4WbkCsbmzzjrw3HNpMN2228Lzz1c6IjOrFsUkiGWBMZIelzS09lGuwKxwq6+eksSKK0LPnvD005WOyMyqQTFVTP3KFYTNu5VWghEjYPvtU5I47zw4/fS64zU1qbHbU4ibWaGK6eY6opyB2LxbfvmUCLp3hzPOgBkz4OyzZ+8ma2ZWqGJWlNtU0quSpkmaIWmWpK/LGZwVb5ll4JVXYL31oF8/WHtt2HlnOOIIWHrpny5l2r9/SiC5PBGgmUFxbRDXAPsB75Em6vs9cG05grJ5065dmiJ8441h3LiUFC68EDp1grZtU/LYZ59UDTV9OuyxR127hScCNLNaxbRBEBHjJLXOFvG5VdLrwOmNXWfz36uvpqk4+vZNg+r694dFF4W33kqPkSNnr3LabjvYaCOYMGH2QXlm1nIVkyC+zVaFGyWpP/ApxY/Etvmg/tQcPXrUbZ9/ft1506bBmDEpYVxzDbz+Oqy6aippmJkV8wV/YHb+saQlQ1cBdi9HUDZv6k/N0aNH2n711dnPW2yxNJ/TGmvAxImw225pvYlOnTwRoJkVN9XGCRFxVWP75reWPNVGKdQvbVxxBZx8cmrHePLJ1I5hZtWn1EuOHpRn38FFRWRNTv3Sxoknws03p0WJttoqTTFuZi1ToyUISfsB+wNbAM/lHFoCmBUR25YvvMa5BFEen36a1sEeNSo1ch9xRKUjMrNSKtVkff8kNUgvC1yWs38q8Obch2dN2QorpJHZ++wDRx4J48enBu5W7pZg1mI0+t89Ij6MiGeA7YDnshHVnwIrA2rsekntJA2W9I6ksZK6S+on6WNJo7LHTjnnny5pnKR3JfWc+1uzebXYYvDQQylBXHQRHHDATwfamVn1KubvwWeBhSWtBDxB6tX09wKuuwoYFhE/BzYEape3uSIiOmWPRwEkrQfsC6wP9AKuk9S6iBitxNq0SVVMF18M99yTej397391xz3q2qx6FZMgFBHfkrq2XhcRe5G+yOd8gbQksCVwM0BEzIiIKQ1c0hu4NyKmR8QHwDigWxExWhlIcOqpcOaZ8OabqRvs+PEedW1W7YpKEJK6A78FHsn2NfbXfQdgMtmoa0k3SWqbHTtW0puSbpG0VLZvJWBCzvUTs331AzlC0khJIydPnlzELdi8OP/81A12wgT45S9hzz096tqsmhWTIP5ImlbjgYgYLWkNoKbhS2hDWr96QERsRBpgdxowgLR8aSdSe8Zlc3qBfCLihojoEhFd2rdvX8ylNo/++MfUo2naNGjd2uMkzKpZwQkiIkZExK8j4i/Z9vsFrEc9EZgYES9n24OBzhHxWUTMiogfgRupq0b6mDRCu9bK2T5rImpqYMgQ+O1vYfLkNIfTzJmVjsrMyqHRBCHpyuznw7kryRWyolxETAImSOqY7dqWtCrdCjmn7UZazhRgKLCvpIUkdQDWBl4p7pasXHJHXd95Zxpx/eqraTZYr1RuVn0KGQdxR/bz0rl8j+OAu7KJ/t4HDgGultQJCGA8cCRAVnU1CBgDzASOyWaOtSag/qjrSy9NczgNHAhXXplGYZtZ9Sh4LqamyiOpK+vHH2GvveCBB+DBB+HXv650RGZWiJKMpJb0Fukv/bwiYoO5iM2qRKtWcMcdsPXWsN9+8Nxz0LlzpaMys1IopIppl+znMdnP2iqnA2ggcVjLseiiMHQobLIJ7LprWs1u5ZUrHZWZzatCp9r4ENg+IvpExFvZ41Rgh/KHaM3Bz34GjzwCU6emSf6mTq10RGY2r4odKLd5zsZmRV5vVe4Xv4D77oO3307VTbPcvcCsWSvmC/4w0txI4yWNB64DDi1LVNZs9ewJf/1rKk2cdFKlozGzeVHwmtQR8RqwYTa/EhHxVe5xSQdFxG0ljs+aoaOPhvfeS9NyRMDVV9cdq6lJ3WX79KlcfGZWmKKriCLiq/rJIXNCCeKxKnHJJbDZZqk0cdFFaZ8n9zNrXkrZhtDo2hDWcrRuDU88AWuvDWecAR07ws47w4EHpuMTJ6YxFLX6908JJJenEjerrFImCHd5tdm0bQvPPAPrrw///jfMmJGqnbbZBlZZJS1ItOGGaVbYUaPSILurr05zO7m0YVZ5BbdBFMAlCPuJd9+Fzz6Dvn3TwkN//Su0b58Sxnvvpcdbb8H776fEcMIJcM01aVEiTyVuVlmlTBAvlPC1rArkTu7Xo0d61G4fffTs586cCR9+mKYSHz48TSO+9dYVCdvMMoVMtdFgZ8WIuDz7eWypgrLqUH9yvx490varr/60ZNCmDXz0UVqxrnt3ePFFOOoouP76+R+3mSWFlCAWz352BLqSpuQG2BVPxW0NyNeVtbYkUV9uaWOrrWDbbeGGG2D55eHcc8sfq5n9VKMJIiLOAZD0LGmxn6nZdj/qlh41myf1SxuPPZYaqC+8EHbYAbbYorLxmbVExfRiWh6YkbM9I9tnNs/69Jm9ZLHwwqkH1JprQu/eqbHbzOavYhLE7cArkvplpYeXAY+ctrJZZhl49NE0pmKnneDzzysdkVnLUsya1BeQVoP7X/Y4JCIuLFdgZpBKEEOHwiefpHES335b6YjMWo5iB8otCnwdEVcBE7N1o83KatNN4e674ZVX4IADPEus2fxScIKQdDZwKnB6tmsB4M5yBGVW3267pVHYDzwAf/pTpaMxaxmKGSi3G7AR8C+AiPhE0uINX2JWOiecAB98AFdeCR06wPHHVzois+pWTBXTjIgIsjmXJLUtT0hmc3bZZWlupxNOgAcfrNvvif3MSq+YBDFI0vVAO0mHA08BNzZ2kaR2kgZLekfSWEndc46dLCkkLZttS9LVksZJelNS52JvyKpb69ZpKvE2bWCffdL6157Yz6w8CkoQkgQMBAYDQ0ijqv8cEX8t4PKrgGER8XNgQ2Bs9pqrkNa0/ijn3B2BtbPHEcCAwm7DWpIdd4SBA9P8TVtskXo3DRxYmon9PO24WZ2CEkRWtfRoRDwZEadExJ8i4snGrstWn9sSuDl7nRkRMSU7fAXQh9mnCe8N3B7JS6TSygqF3461FLvvDscdl5LEtGlw3XXw5Zfz/rpdu6bSyP33p22XTqwlK6aK6V+Siv1v0gGYDNwq6XVJN0lqK6k38HFEvFHv/JWACTnbE7N9ZrOpqYG77oKzzkrrTjzwQFpb4pln5u1127eHddaBPfZIJZXc2WjNWppiEsQmwIuS/pO1D7wl6c1GrmkDdAYGRMRGwDdAP+AM4M9zEzCApCMkjZQ0cvLkyXP7MtZM5U7sd9558PDDsMQSaf3rbbZJK9j98ENxrzl+PBx0EGywAYweDcsuC8OGpXEXTg7WUinVHhVworRavv0R8WED1/wMeCkiVs+2f0VKEL8EasfErgx8AnQDzgGeiYh7svPfBbaOiE/n9B5dunSJkSNHFnQPVh36909VPrlf3DU18PzzaU2Jm29Ox+++G9Zaq+HX+vxzuOCCtJhR69ap6+xmm8Ehh8DXX6fG8Mcec5Kw6iPptYjo0uBJEVHUA1gOWLX2UcD5zwEds+f9gEvqHR8PLJs93xl4jLQ63abAK429/sYbbxxmuQYPjlhqqYjFFou49daIH3/86TlffRVx9tnpnNatIw4/PGLChIjhwyOWXTb9PO+8CIhYcsm0bVZNgJHRyPdrMSOpfy3pPeADYET2xf5YAZceB9yVVUd1Ahqav+lR4H1gHKkL7R8Kjc+s1h57wBtvpFXpDjkkVTtNmZKOff89HHMMrLACnHNOamcYPTqtPbHyyrNPO37KKdCxY2rj+Oc/K3pLZhVRzEjq80h/1T8VERtJ6gEc0NhFETEKmGMxJrLqp+x5AMcUEZNZXqusAk8/DUcemaqcOnaEk06Cyy9P1Uobbwx/+xt0qfcvM3eRo4UWSr2jtt0Wpk+fv/GbNQXFNFL/EBH/BVpJahURNTTwxW9Waa1bw003pS/5L76A005LXWEvvRRGjvxpcshnm21SQ/XFF3tNCmt5ikkQUyQtBjxLqjK6itQryaxJO/roVF0EKUmcfHJx1196aapm+sMfUk8ps5aimATRG/gOOBEYBvyHtC61WZNWU5Oqmfr2TdVK9UdKN2b55eGii2D48NQzyqylKLiba1Plbq7WkNwxEz16/HS7UD/+mLq/fvABvPMOLLVU+WI2mx8K6eZaTC+mqZK+zh7fS5ol6et5D9OsfHJ7JUH6OWhQ2l+MVq3SWIkvvoAzzyx9nGZN0VyVILLJ+3oDm0bEaSWPqgguQdj8dOKJcNVV8OKLsMkmlY7GbO6VtASRKxtn8SDQc26uN2uuzj0XVlwRjjoqTRRoVs0KHgchafeczVakLq7flzwisyZs8cXTinZ77QXXXpsWLjKrVsWUIHbNefQEppKqmcxalNqZXs86Cz7+uLBrvM6ENUcFJ4iIOCTncXhEXBARn5czOLOmSIJrrklVTH/8Y2HX1K4zUZskvM6ENQfFVDFd3dDxiPAS8tZirLEGbLUVDB6cZnvdcce0v6Ym9ZCqnbLju+9St9iJE9OUHb16wZprwqRJMGSIZ4m1pq2YuZgWBtYjLT0KsBcwBnix1EGZNQcnnghPPQWHHpqm4RgyJLVJ/OY3acW70aNh3Lg0hgJggQXS+ImxY9P2rbdChw6w+uqVugOzhhWzHsRLwBYRMTPbXgB4LiI2LWN8jXI3V6ukyy//6dQdrVrB2mvDL36RHuuvn35OnAj7759mmK2topLS7LJnnJEWKTKbX0rdzXUpYImc7cWyfWYt1kknpWojSCWH11+Hb75J1UqDB0O/fqnH06RJKTkMGpQaph95JPWI2m67NK5izTXhwgvTtWZNRTEJ4mLgdUl/l3Qb8C8aXtvBrOrV1KSZYfv2TSva/e9/sPDCPz0v34juwYNTO8bbb6dZY888M5U89tgjVV3Vfx/3eLL5rrEVhXIfwM9IXVt7Az8r5tpyPbyinFVK7upz+baL9cILEZtvnlaxa906ol+/tBrevL6uWT6UeEW5zYGpEfEQsDjQZ07rVJu1BKWa56nWZpvBc8/B0KFpdbt+/dLCR7vvXvzkgmalUEwj9ZvAhsAGwK3AzcDeEbFV+cJrnBuprRrNmgW77QYPP5y2Dz00TTm+3HKVjcuqR6kbqWdmxZLewLURcS2pJGFmJfbss2lCwD59YJFF4LbbYJ114OqrPQeUzT/FJIipkk4nrUP9iKRWwALlCcus5cpds+Ivf0k9npZYAtZaK42z6NwZRoyodJTWEhSTIPYBpgOHRcQkYGXgkrJEZdaC5WvbGDIkdZe9/374+mvYeuvUbbbQuaDM5kbJVpST9GJEdC/JixXBbRDW0nz7beryevHF0KYNbLllmhNqhx3qzqk/5YdZfWVbD2IO8vT+BkntJA2W9I6ksZK6SzpP0puSRkl6QtKK2bmSdLWkcdnxziWMz6wqLLpo6uE0ZkwaaPfYY7DTTnXjJDwRoJVKKRPEnIoiVwHDIuLnpF5QY4FLImKDiOgE/AP4c3bujsDa2eMIYEAJ4zOrKmusAQ8+mBLEz34Gp54Kv/xlqopyt1grhVImiJ+QtCSwJalLLBExIyKmRETuWtZtqUsuvYHbs3EcLwHtJK1QzhjNmrteveA//6kblT19OsyYUemorBo0miAkLVTgaynPvg7AZOBWSa9LuklS2+x1L5A0AfgtdSWIlYAJOddPzPbVj+kISSMljZw8eXKB4ZlVr3/+M80e+/vfpynGe/WCo4+GadMqHZk1Z4WUIF4EkHRHI+cdmGdfG6AzMCAiNgK+AU4DiIgzI2IV4C7g2IIjTtfeEBFdIqJL+/bti7nUrOrkdou98cbULXaRReBvf4MNN4QXXqh0hNZcFZIgFpS0P7CZpN3rP2pPioi381w7EZgYES9n24NJCSPXXcAe2fOPgVVyjq2c7TOzOajfLbZnz5QkjjoKIuBXv0rtE9OnVzZOa34KWTDoKFI1UDvSetS5Arh/ThdGxCRJEyR1jIh3gW2BMZLWjoj3stN6A+9kz4cCx0q6F9gE+CoiPi34bsxaoHxdWXv0SI+pU+FPf0o9nB59NK1q17v37A3Y7hJrc1LMXEyHRcTNRb+B1Am4CVgQeB84JNvuCPwIfAgcFREfSxJwDdAL+BY4JCIaHOTgcRBmjXv00dQ+8dlnaTryhx5KXWRzq6fc66llKWQcRDEJYkFSaWLLbNcI4G8R8cM8RTmPnCDMCvPf/6bV6wYOTAPsjjgiJQYnh5ap1APlrgM2zn5eR9b4PPfhmdn8tMwycO+9cM89KUFcdx1065am7TDLp5A2iFpdI2LDnO3hkt4odUBmVl7LL59GY6+wQqp62nzzVOXkDoFWXzEliFmS1qzdkLQGMKv0IZlZudS2OQweDOPGwR/+kKYV79gxJQuzXMUkiFOAGknPSBoBDAdOLk9YZlYOuV1iW7WCa6+Fm26CBReEnXeGY49NA+3MoMjZXLNR1R2zzXcjYnrOse0j4skSx9coN1Kbzbvvv4czzoArroB114W77oKNNqp0VFZOJZ/NNSKmR8Sb2aP+sJu/FB2hmTUJCy8Ml18OTzwBU6bAJpukGWKffnr282pq6maNtepXysn68s3FZGbNyPbbw1tvwa67pllid9wxdYsFTyPeEs2P6b7NrBlZZpnUiH3LLdC6Ney3Xxp97QF1LU9Zp/s2s+ZJgkMOSdOHr7giDB2aZoYdOBCefx5+/LHSEdr8UMoEMb6Er2VmTcBHH6VJ/vbbL0389/e/p8n/1lwTzjoL3nmn7tz+/VM1VC63WTRvBScISa0l/VrS8ZJOqn3UHo+I3Ru63syal9x5mu6+O7VJLLYYnH56Gjdx0UWpx1OXLnDllSlp7L13XZJwm0XzV8xI6oeB74G3SJPsmVkVqz+NeI8ecN99af+wYTBpUpq648474cQT07iKzp1Te8Xxx8P117vNorkrZrK+NyNigzLHUzSPgzCrvLFj09iJO++EDz9M+/74xzSuwpqmUo+DeEzSDvMYk5lVoXXXhfPPh5tvTtVQbdrAVVfBJZdUOjKbF8UkiJeAByR9J+lrSVMlfV2uwMyseampgX33TT2e3ngDVl01LUJ04IEwc2alo7O5UUyCuBzoDiwaEUtExOIRsUSZ4jKzZia3zWK99WD0aOjVK1U7bbcdfPJJpSO0YhWTICYAb0cxkzeZWYvRp8/sDdJt26aeT7ffnpJHp07w+OMVC8/mQjEJ4n3gGUmn5+vmamaWz4EHwsiRaR2KXr3gzDNd5dRcFJMgPgCeJq0tvXjOw8ysQeuuCy+/nNbFvvBCWHvtVB2Vy4Pqmp6Cx0FExDnlDMTMqtuii8KNN6YlTn//+9Sg/cEHcOqpsw/Ks6aj4AQhqYY8E/JFxDYljcjMqtpvf5tGX++0E5x2Wppi/M03PaiuKSpmJPWfcp4vDOwBuCbRzIrWsWOaCLBrVxg+PJUqnByanoLbICLitZzHCxFxErB1Y9dJaidpsKR3JI2V1F3SJdn2m5IekNQu5/zTJY2T9K6knnN1V2bW5L30Enz2GWywATzzDBx8cKUjsvqKmaxv6ZzHspJ6AUsWcOlVwLCI+DmwITAWeBL4RTZ1x7+B07P3WA/YF1gf6AVcJ6l1UXdkZk1ebpvDv/4FPXvCbbfB736XZo21pqGYKqbXqGuDmEma3vuwhi6QtCSwJXAwQETMAGYAT+Sc9hKwZ/a8N3BvtpzpB5LGAd2AF4uI08yauPoTAT76KOyyC9xxB6y2Gpx7blqTwiqr0QQhqSswISI6ZNsHkdofxgNjGrm8AzAZuFXShqQkc0JEfJNzzqFAtqghK5ESRq2J2b76MR0BHAGw6qqrNnYLZtbE9Okz+3arVvCPf8CRR6Y5nWbNggsucJKotEKqmK4n/dWPpC2Bi4DbgK+AGxq5tg3QGRgQERsB3wCn1R6UdCapNHJXMUFHxA0R0SUiurRv376YS82siWrVKk0RfuSRaa2J005zdVOlFVLF1Doivsye7wPcEBFDgCGSRjVy7URgYkS8nG0PJksQkg4GdgG2zZm+42NglZzrV872mVkL0KoVXHdd+tm/fypJXHKJSxKVUlCCkNQmImYC25JV7RRyfURMkjRBUseIeDe7fkzWwN0H2Coivs25ZChwt6TLgRWBtYFXirgfM2vmWrWCa6+F1q3hssvS+teXXeYkUQmFJIh7gBGSvgC+A54DkLQWqZqpMccBd0lakDSf0yHAq8BCwJNKn/pLEXFURIyWNIjUtjETOCYiZhV5T2bWzElw9dUpWVxxRVqEaPDguiRRU5Mauuu3ZfTvn8ZW5I6pmNO51rhGE0REXCDpaWAF4Imc6qBWpC//xq4fBdRftWitht4PuKCx1zWz6ialta4//hiGDIHdd4f7709jJuY0LUfXrnXHevTwFB7zquAlR5sqLzlqVt0iYJ990nrYK64Ikyenta+XXz5VQ9V/fP45jBgBm2yS1qTwFB75FbLkaDHjIMzM5jsJBg6EL75IJYIVV4QffkjVTrNm5X+0aZOSROfOaRoPmzvFTPdtZlYRzzwDb70FffvCjBlw6aUwalTaN2YMvPsujBuXZoe97TZYeGHYbLM0Svv44ysdffPlBGFmTVpuO8K556afe++d9jd07nPPweabwzXXwF/+Mv/jrgZOEGbWpNWflqNHj7T96qsNn9uqFQwbBmuuCWefnUoaVhw3UptZVZswAbp1g0UWgVdegWWXrXRETUMhjdQuQZhZVVtlFXjoIfj009RVdvr08r9n//4/rQJrjkuqOkGYWdXr1g1uvTW1Sxx9dPnneKodj1GbJGrbRrp2Le/7lpq7uZpZi7DvvvDOO3DOObDuunDKKeV7rx494J57YNddU1fbsWOb53gMlyDMrMX485/TX/KnngpDh5bvfcaMgTPPhG++SaWWHXdsfskBnCDMrAVp1SpVNW28Mey/P7zxRmlff+ZMuPhi2GijVFpZfHFYYolUmhg+vLTvNT84QZhZi7LooqnRul27VAU0aVJpXnf0aOjeHU4/PU3z0aZNep+rrkqJ4ze/yT92oylzgjCzFmfFFVMV06RJsM028P33dceK7W00cyZceGFqaxg/PrU17LJLmn22Rw848EBYf/1UmnjppUZfrklxgjCzFqlz59ROMHZs+kKfMaP43kZvvQWbbppe5ze/SW0Pe+2VphavbXNo3TpVO33ySSq1NCdOEGbWYp19Nhx2GDz9dJq/afvt01/7zz6bSgBjx6aJAWH2sQ0//JDWzq5ta7jvvjSh4JxWQN55Z/jVr1IPqmnT5s+9lYJHUptZi5Y7nfg666TZYN9/v26sxAILpP3t26eR2CecAI8/niYCXGih1AC9226Nv8+LL6YJBM89N006WGkeSW1m1ohnnkklg7594csv4cYb01/5r70Gt98OJ50EHTqk6cW//RYuuiiVGpZYAh57rLDkAKkBe/fdU0lk8uSy3lLJeKCcmbVYubO/9uiRHrnbnTvPfv4338CJJ6Yk0rdv8WMbLrww9Ww6//zUu6mpcwnCzFqsYmaKhVTF9MADKTkMGFB8t9WOHVObx4ABqRqrqXMbhJlZAeqXNupvF+qTT2CttVKvp7vvLlu4jXIbhJlZiRRb2piTFVdM1VT33JMaupuyspcgJLUDbgJ+AQRwKLAy0A9YF+gWESNzzj8dOAyYBRwfEY839PouQZhZc/PVV7DGGmnKjyeeqEwMTaUEcRUwLCJ+DmwIjAXeBnYHns09UdJ6wL7A+kAv4DpJredDjGZm882SS8JZZ8GTT6ZHU1XWBCFpSWBL4GaAiJgREVMiYmxEvJvnkt7AvRExPSI+AMYB3coZo5lZJfzhD7DaanDaafDjj5WOJr9ylyA6AJOBWyW9LukmSW0bOH8lYELO9sRs32wkHSFppKSRk5tLh2IzsxwLLQTnnZfaIQYNKvy6+blaXbkTRBugMzAgIjYCvgFOm9cXjYgbIqJLRHRpP6ex7WZmTdz++8MGG6S5nGbMKOya+blaXbkTxERgYkS8nG0PJiWMOfkYWCVne+Vsn5lZ1amdyO/99+GGGwq7Zuut4coroXfvVE01N11tC1XWBBERk4AJkjpmu7YFxjRwyVBgX0kLSeoArA28Us4Yzcwq6c03YcMN0xxNU6emfblVRt9/Dy+8AJdckqb1WGEFOOCAdO6AAWmN7XKtVjc/pto4DrhL0oLA+8AhknYD/gq0Bx6RNCoiekbEaEmDSElkJnBMRMyaDzGamVVEt26pFDFlClx2WZpN9rDDoGfPNH/Ta6/VzSi75pqwww6wzDJw222pBDFgQN00IaXmkdRmZhVWUwO9es3eDrHQQqldYbPN0qN7d1huudKN6G4q4yDMzKwBPXrAUUel59tvn1ae+/preO45+MtfUnvDcsul46Ua0V0Iz+ZqZlZhNTVpXqbaSQC//RYWXDD/uX36/HRfuaqYXIIwM6ug3Cqic89NP3O7sVaSE4SZWQXNzyqjYrmR2sysBXIjtZmZzTUnCDMzy8sJwszM8nKCMDOzvJwgzMwsr2bfi0nSZODDSscxj5YFvqh0EGVW7ffo+2v+qv0e69/fahHR4HoJzT5BVANJIxvrbtbcVfs9+v6av2q/x7m5P1cxmZlZXk4QZmaWlxNE01DgWlLNWrXfo++v+av2eyz6/twGYWZmebkEYWZmeTlBmJlZXk4QFSZpvKS3JI2S1OynpZV0i6TPJb2ds29pSU9Kei/7uVQlY5xXc7jHfpI+zj7HUZJ2qmSM80LSKpJqJI2RNFrSCdn+qvgcG7i/qvgMJS0s6RVJb2T3d062v4OklyWNkzRQ0hyWJMp5LbdBVJak8UCXiKiKATqStgSmAbdHxC+yff2BLyPiYkmnAUtFxKmVjHNezOEe+wHTIuLSSsZWCpJWAFaIiH9JWhx4DfgNcDBV8Dk2cH97UwWfoSQBbSNimqQFgOeBE4CTgPsj4l5JfwPeiIgBDb2WSxBWUhHxLPBlvd29gduy57eR/jM2W3O4x6oREZ9GxL+y51OBscBKVMnn2MD9VYVIpmWbC2SPALYBBmf7C/r8nCAqL4AnJL0m6YhKB1Mmy0fEp9nzScDylQymjI6V9GZWBdUsq1/qk7Q6sBHwMlX4Oda7P6iSz1BSa0mjgM+BJ4H/AFMiYmZ2ykQKSIpOEJW3RUR0BnYEjsmqL6pWpDrNaqzXHACsCXQCPgUuq2g0JSBpMWAI8MeI+Dr3WDV8jnnur2o+w4iYFRGdgJWBbsDP5+Z1nCAqLCI+zn5+DjxA+jCrzWdZvW9t/e/nFY6n5CLis+w/5Y/AjTTzzzGrux4C3BUR92e7q+ZzzHd/1fYZAkTEFKAG6A60k9QmO7Qy8HFj1ztBVJCktlkjGZLaAjsAbzd8VbM0FDgoe34Q8FAFYymL2i/OzG40488xa+S8GRgbEZfnHKqKz3FO91ctn6Gk9pLaZc8XAbYntbPUAHtmpxX0+bkXUwVJWoNUagBoA9wdERdUMKR5JukeYGvS1MKfAWcDDwKDgFVJU7PvHRHNtpF3Dve4NalqIoDxwJE59fXNiqQtgOeAt4Afs91nkOrpm/3n2MD97UcVfIaSNiA1QrcmFQIGRcS52ffNvcDSwOvAARExvcHXcoIwM7N8XMVkZmZ5OUGYmVleThBmZpaXE4SZmeXlBGFmZnk5QZiVgaTVc2d7NWuOnCDMzCwvJwizMpO0hqTXJXWtdCxmxWjT+ClmNrckdSSNXj04It6odDxmxXCCMCuf9qT5bnaPiDGVDsasWK5iMiufr4CPgC0qHYjZ3HAJwqx8ZpBmBX1c0rSIuLvSAZkVwwnCrIwi4htJuwBPZkliaKVjMiuUZ3M1M7O83AZhZmZ5OUGYmVleThBmZpaXE4SZmeXlBGFmZnk5QZiZWV5OEGZmltf/AdpEv42sln9PAAAAAElFTkSuQmCC\n", "text/plain": [ - " cluster\n", - "0 1\n", - "1 7\n", - "2 3\n", - "3 9\n", - "4 4\n", - ".. ...\n", - "82 3\n", - "83 4\n", - "84 2\n", - "85 7\n", - "86 4\n", - "\n", - "[87 rows x 1 columns]" + "
" ] }, - "execution_count": 130, - "metadata": {}, - "output_type": "execute_result" + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" } ], "source": [ - "clusters" + "Sum_of_squared_distances = []\n", + "K = range(2,30)\n", + "for k in K:\n", + " km = KMeans(n_clusters=k, max_iter=200, n_init=10)\n", + " km = km.fit(text_test_tf)\n", + " Sum_of_squared_distances.append(km.inertia_)\n", + "plt.plot(K, Sum_of_squared_distances, 'bx-')\n", + "plt.xlabel('k')\n", + "plt.ylabel('Sum_of_squared_distances')\n", + "plt.title('Elbow Method For Optimal k')\n", + "plt.show()" ] }, { "cell_type": "code", - "execution_count": 133, + "execution_count": 156, "metadata": {}, "outputs": [], "source": [ - "clusters.to_csv(\"dev-0\\out.tsv\", sep=\"\\t\",index=False,header=None)" + "true_k_dev = 10\n", + "model_dev = KMeans(n_clusters=true_k_dev, init='k-means++', max_iter=200, n_init=10)\n", + "model_dev.fit(text_tf)\n", + "labels_dev=model_dev.labels_\n", + "clusters_dev=pd.DataFrame(list(labels_dev),columns=['cluster'])" + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "metadata": {}, + "outputs": [], + "source": [ + "true_k_test = 28\n", + "model_test = KMeans(n_clusters=true_k_test, init='k-means++', max_iter=200, n_init=10)\n", + "model_test.fit(text_test_tf)\n", + "labels_test=model_test.labels_\n", + "clusters_test=pd.DataFrame(list(labels_test),columns=['cluster'])" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": {}, + "outputs": [], + "source": [ + "clusters_dev.to_csv(\"dev-0\\out.tsv\", sep=\"\\t\",index=False,header=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": {}, + "outputs": [], + "source": [ + "clusters_test.to_csv(\"test-A\\out.tsv\", sep=\"\\t\",index=False,header=None)" ] }, { diff --git a/dev-0/out.tsv b/dev-0/out.tsv index c42233b..d5e9cf3 100644 --- a/dev-0/out.tsv +++ b/dev-0/out.tsv @@ -1,87 +1,87 @@ -1 -7 +0 +6 +4 +8 +9 3 +5 +1 +3 +4 +0 +3 +0 +9 +9 +5 +9 9 4 -6 -2 0 -6 -3 -0 -6 -7 -4 +8 7 2 +6 +6 7 -7 -3 -4 -8 -4 -4 -8 -0 -4 +6 +2 +9 +9 +5 +5 +5 +0 +9 5 -4 -4 7 -2 -2 -2 -4 -7 -2 +6 +6 +8 +3 +8 +1 +8 +0 +8 +1 +9 +6 +5 7 4 5 -9 +7 +1 6 -1 -2 9 -1 -3 -2 +9 +5 7 5 -2 -0 +7 +1 +1 3 -2 +7 +5 +0 4 +5 +9 +4 +1 +5 +9 +0 +1 1 8 -7 -7 -2 -3 -2 -7 -2 -2 -6 -4 -2 -1 -3 -2 -4 -3 -1 -2 -7 -0 -0 -1 9 4 3 +4 0 -3 -4 -2 +5 7 -4 +0 diff --git a/script.py b/script.py new file mode 100644 index 0000000..34d9755 --- /dev/null +++ b/script.py @@ -0,0 +1,75 @@ +import pandas as pd +from many_stop_words import get_stop_words +from sklearn.feature_extraction.text import TfidfVectorizer +from unidecode import unidecode +from nltk.tokenize import word_tokenize +import string +import matplotlib.pyplot as plt +from sklearn.cluster import KMeans + +data=pd.read_csv('dev-0/in.tsv', sep='\t', header=None) +data_test=pd.read_csv('test-A/in.tsv', sep='\t', header=None) + +def remove_punctuations(text): + for punctuation in string.punctuation: + text = text.replace(punctuation, '') + return text + +data[0] = data[0].str.lower() +data_test[0] = data_test[0].str.lower() +stop_words = get_stop_words('pl') + +data[0] = data[0].apply(unidecode) +data_test[0] = data_test[0].apply(unidecode) +uni_stop_words = [unidecode(x) for x in stop_words] + +data[0] = data[0].apply(remove_punctuations) +data_test[0] = data_test[0].apply(remove_punctuations) + +data[0] = data[0].apply(lambda x: ' '.join([item for item in x.split() if item not in uni_stop_words])) +data_test[0] = data_test[0].apply(lambda x: ' '.join([item for item in x.split() if item not in uni_stop_words])) + +tf=TfidfVectorizer() +text_tf= tf.fit_transform(data[0]) +text_test_tf= tf.fit_transform(data_test[0]) + +Sum_of_squared_distances = [] +K = range(2,20) +for k in K: + km = KMeans(n_clusters=k, max_iter=200, n_init=10) + km = km.fit(text_tf) + Sum_of_squared_distances.append(km.inertia_) + +plt.plot(K, Sum_of_squared_distances, 'bx-') +plt.xlabel('k') +plt.ylabel('Sum_of_squared_distances') +plt.title('Elbow Method For Optimal k') +plt.show() + +Sum_of_squared_distances = [] +K = range(2,30) +for k in K: + km = KMeans(n_clusters=k, max_iter=200, n_init=10) + km = km.fit(text_test_tf) + Sum_of_squared_distances.append(km.inertia_) + +plt.plot(K, Sum_of_squared_distances, 'bx-') +plt.xlabel('k') +plt.ylabel('Sum_of_squared_distances') +plt.title('Elbow Method For Optimal k') +plt.show() + +true_k_dev = 10 +model_dev = KMeans(n_clusters=true_k_dev, init='k-means++', max_iter=200, n_init=10) +model_dev.fit(text_tf) +labels_dev=model_dev.labels_ +clusters_dev=pd.DataFrame(list(labels_dev),columns=['cluster']) + +true_k_test = 28 +model_test = KMeans(n_clusters=true_k_test, init='k-means++', max_iter=200, n_init=10) +model_test.fit(text_test_tf) +labels_test=model_test.labels_ +clusters_test=pd.DataFrame(list(labels_test),columns=['cluster']) + +clusters_dev.to_csv("dev-0\out.tsv", sep="\t",index=False,header=None) +clusters_test.to_csv("test-A\out.tsv", sep="\t",index=False,header=None) \ No newline at end of file diff --git a/test-A/out.tsv b/test-A/out.tsv new file mode 100644 index 0000000..4da1246 --- /dev/null +++ b/test-A/out.tsv @@ -0,0 +1,691 @@ +27 +17 +3 +19 +1 +7 +27 +10 +19 +1 +2 +20 +15 +22 +12 +1 +1 +11 +1 +12 +10 +15 +7 +22 +25 +17 +19 +13 +10 +1 +4 +5 +7 +6 +8 +2 +20 +19 +3 +27 +21 +23 +1 +15 +25 +21 +0 +11 +3 +12 +3 +24 +19 +22 +9 +23 +19 +3 +16 +24 +21 +1 +25 +17 +12 +6 +22 +7 +0 +12 +9 +8 +1 +1 +11 +19 +27 +12 +21 +2 +9 +26 +18 +2 +17 +20 +19 +19 +17 +21 +22 +9 +8 +17 +1 +1 +27 +25 +27 +14 +25 +15 +1 +13 +20 +0 +7 +20 +11 +17 +15 +3 +12 +3 +20 +17 +17 +12 +11 +19 +11 +10 +16 +21 +19 +3 +1 +23 +15 +23 +9 +8 +21 +23 +16 +8 +4 +19 +18 +4 +27 +10 +11 +4 +8 +19 +17 +4 +19 +23 +1 +1 +17 +12 +22 +20 +1 +14 +1 +15 +22 +17 +4 +11 +9 +20 +18 +22 +8 +8 +2 +19 +14 +20 +1 +18 +19 +16 +23 +2 +26 +11 +5 +1 +10 +10 +10 +18 +10 +9 +27 +8 +20 +19 +14 +14 +19 +3 +19 +27 +21 +24 +27 +25 +1 +1 +3 +11 +17 +27 +15 +1 +12 +7 +14 +20 +12 +7 +16 +10 +12 +0 +9 +17 +18 +8 +22 +13 +18 +20 +0 +13 +23 +9 +7 +25 +8 +22 +7 +19 +27 +12 +6 +13 +19 +16 +9 +9 +21 +11 +0 +2 +26 +15 +24 +18 +5 +1 +22 +11 +23 +15 +12 +13 +4 +13 +4 +2 +24 +11 +24 +10 +9 +19 +7 +1 +25 +15 +11 +1 +19 +9 +23 +11 +15 +27 +11 +3 +1 +7 +27 +0 +22 +2 +9 +9 +1 +27 +1 +13 +25 +11 +12 +9 +2 +16 +19 +7 +17 +2 +17 +9 +6 +1 +18 +2 +9 +4 +5 +24 +21 +18 +15 +17 +21 +21 +17 +7 +11 +25 +7 +19 +19 +23 +24 +3 +19 +6 +12 +19 +17 +21 +15 +12 +22 +11 +1 +20 +0 +0 +22 +7 +9 +15 +1 +22 +9 +1 +27 +1 +5 +8 +20 +20 +9 +4 +3 +5 +11 +22 +17 +21 +20 +13 +10 +14 +23 +1 +22 +19 +24 +2 +4 +25 +27 +15 +25 +20 +13 +7 +19 +6 +12 +3 +12 +2 +27 +17 +1 +21 +17 +19 +23 +14 +22 +12 +7 +10 +10 +15 +21 +27 +10 +20 +23 +9 +11 +9 +4 +5 +20 +0 +20 +7 +22 +24 +3 +17 +13 +12 +8 +22 +11 +24 +26 +12 +21 +15 +22 +7 +16 +3 +21 +14 +1 +2 +1 +26 +15 +13 +24 +2 +27 +13 +21 +23 +20 +11 +21 +9 +11 +0 +23 +2 +27 +1 +3 +19 +7 +21 +21 +23 +21 +10 +1 +0 +24 +23 +8 +16 +22 +18 +21 +0 +22 +25 +19 +9 +24 +17 +27 +3 +11 +22 +15 +11 +15 +4 +17 +11 +25 +3 +2 +13 +19 +6 +15 +1 +15 +25 +7 +22 +7 +2 +24 +20 +2 +1 +2 +11 +15 +10 +22 +11 +17 +13 +19 +18 +16 +5 +26 +27 +21 +3 +19 +15 +24 +12 +9 +0 +3 +4 +1 +11 +15 +7 +16 +5 +20 +15 +1 +21 +24 +13 +8 +26 +27 +27 +8 +6 +7 +3 +16 +10 +13 +1 +23 +19 +10 +8 +3 +3 +9 +2 +21 +20 +15 +11 +20 +19 +23 +13 +10 +7 +24 +9 +26 +23 +19 +9 +2 +20 +22 +7 +15 +2 +27 +20 +10 +24 +3 +12 +9 +12 +23 +2 +16 +27 +21 +1 +20 +5 +27 +13 +20 +19 +11 +11 +2 +17 +25 +15 +9 +3 +12 +18 +25 +9 +1 +25 +20 +11 +8 +1 +21 +27 +18 +22 +16 +4 +12 +27 +8 +23 +10 +22 +19 +22 +13 +2 +9 +13 +26 +20 +12 +0 +1 +24 +20 +22 +20 +7 +1 +19 +19 +15 +16 +19 +8 +19 +15 +1 +16 +22 +27 +18 +1 +16 +16 +7 +16 +8 +7 +22 +5 +3 +12 +13 +27 +10 +22