diff --git a/.ipynb_checkpoints/P0. Data preparation-checkpoint.ipynb b/.ipynb_checkpoints/P0. Data preparation-checkpoint.ipynb new file mode 100644 index 0000000..e905e56 --- /dev/null +++ b/.ipynb_checkpoints/P0. Data preparation-checkpoint.ipynb @@ -0,0 +1,698 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Building train and test sets" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# if you don't have some library installed try using pip (or pip3) to install it - you can do it from the notebook\n", + "# example: !pip install tqdm\n", + "# also on labs it's better to use python3 kernel - ipython3 notebook\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.sparse as sparse\n", + "import time\n", + "import random\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "import helpers\n", + "\n", + "os.makedirs('./Datasets/', exist_ok = True)\n", + "\n", + "helpers.download_movielens_100k_dataset()\n", + "\n", + "df=pd.read_csv('./Datasets/ml-100k/u.data',delimiter='\\t', header=None)\n", + "df.columns=['user', 'item', 'rating', 'timestamp']\n", + "\n", + "train, test = train_test_split(df, test_size=0.2, random_state=30)\n", + "\n", + "train.to_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, index=False)\n", + "test.to_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None, index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Interactions properties" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### How data looks like?" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
useritemratingtimestamp
01962423881250949
11863023891717742
2223771878887116
3244512880606923
41663461886397596
\n", + "
" + ], + "text/plain": [ + " user item rating timestamp\n", + "0 196 242 3 881250949\n", + "1 186 302 3 891717742\n", + "2 22 377 1 878887116\n", + "3 244 51 2 880606923\n", + "4 166 346 1 886397596" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sample properties" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We have 943 users, 1682 items and 100000 ratings.\n", + "\n", + "Average number of ratings per user is 106.0445. \n", + "\n", + "Average number of ratings per item is 59.453.\n", + "\n", + "Data sparsity (% of missing entries) is 93.6953%.\n" + ] + } + ], + "source": [ + "users, items, ratings=df['user'].nunique(), df['item'].nunique(), len(df)\n", + "\n", + "print(f'We have {users} users, {items} items and {ratings} ratings.\\n')\n", + "\n", + "print(f'Average number of ratings per user is {round(ratings/users,4)}. \\n')\n", + "print(f'Average number of ratings per item is {round(ratings/items,4)}.\\n')\n", + "print(f'Data sparsity (% of missing entries) is {round(100*(1-ratings/(users*items)),4)}%.')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6UAAAHvCAYAAACsfXllAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABM+UlEQVR4nO3deZgcZbn+8fshC4JB2UJMwjKIyB4mSYNBFKIcJJIQUUSTnyAcPAkqKIhyjHIgQVGD4DHiwiERDkExqHCALMi+CAjCTAgIMQKBAbIQwk4gLAnP74+qmXRN9fRMZqm3eur7ua6+uruquvrut2qSfvqtesvcXQAAAAAAhLBJ6AAAAAAAgOKiKAUAAAAABENRCgAAAAAIhqIUAAAAABAMRSkAAAAAIBiKUgAAAABAMBSlAJARM5tmZh7fRofOU2vMbGsz+5GZPWBmr5nZu3Fbvhw6W08xs0vL9pm60HkAAOgJFKUAul3Zl+jm20c68JoJZctPyyAmaoiZDZLUKOn7kuolDZBkITN1hpmNjn+cmEaRCQBApG/oAAAK4ceSDgkdAjXtDEl18eO7Jf1e0ipJLumdQJk6Y7SkqfHj2yU1hQoCAEBeUJQCyMInzezf3P3m0EFQsw6P71+S9Cl3fyNkmKy4+/GSjg8cAwCAHsXhuwB6Unnh8JNgKdAb7BDf/6soBSkAAEVBUQqgJz0j6er4ccnMPhcyDGpa//j+raApAABAt6MoBdDT/kvSu/Hjc8ysT2dXVDYQ0u1dXdbMbm9eJn6+iZmdEE9/zsxeN7N/mNkZZrZFq9d+wMx+aGYPmdmrZvaKmf3VzL7Yic90uJlda2bLzOyt+H6OmR2wEevYNs55p5k9a2Zvm9nq+Pl/ts5f4fVNcVs0xc/fY2bfNLO7zGxVPMrt7Rv72crWv7mZfcvMbovzvRW38V1m9j0ze38br2sZrbhs8sEVBtIavZF5ji977fHxtJKZ/dbMHo+3fWK9Fvl4PPrvrWa2Iv4cr5vZk2Z2hZkdYWYVB19q/izacD6pJN1W4bPc3up1VUffjQdOSgwQZmY7mtnPzGxJnO9lM/ubmX3dzDp02o6ZfdbMFsTb/814H/m9xYOWVWrDNtbzGTP7k5k9YWZvxOtabmYPmtnvzOxLZrZVRzK1sf5Eu5nZQDP7gUV/u6/Ef5+NZjbFzDbbiPWOidv+MYtGen7DzJbG0z7Wzms3ev/qYKYOj8TckWUt+jv/upndZGYr4/15Tbyt7zOzX5jZp82sXzvvdYCZXWhmi+N97U0ze9rM/mhmY9t5baX9dzczm2Fm/4y3X9V9DEAv4e7cuHHj1q03RYPPuKQl8fNLy6Yd38ZrJpQtM62d9d6+ERkqLqtokJnmZQZIurnseevbQklbxa87QNJzVZY9v0qmaWXLjZb06yrrWS9pagc+5/GSXq2yHpf0rKQDqqyjKV6uSdLOkh6usI5227yNdY+StLydfM8rOk+0WntVu43eyEzHl++PkqZIWldtvZL+t4NZ/iLpfV34LLe3et2lZfPqKqx3dNn8aZLGKDrvtq313yhp0ypt00/Sn6q8fp2kb7duwwrr2UzS/A5+5lO74d+a2yUNl7Ssyvs8JmnndtY3UNItHcj8W0n9umv/6uBnrbovbOR+s0vcHh3ZPvVtvMd7Jc3pwOvnS9qijXW03n+/rOi0j9brSO1j3Lhx6103BjoCkIWpkiYqOgRzmpn9wd3fDpyp3P8qGh34bkVfyJ+VtJOkk+L74ZJmmNlUSTco+hy/lXSXpLclfVzSJEWDx33bzG5w95vaec9TJB2pqCD7raSHJG2uqKg4StGRLNPM7AV3/1WlFZjZKZJmxE/fknSVpDslvSBp63hdn5E0SNLNZrafuy+ukmlTSf8naa/4s10laYWiL+qD2vk8lfLVS7pVUYEiSQ9I+oOkpyV9QNIXJB0oaRtJ883sU+5+e9kqrpC0KH58dXz/iKLe93IPb2y2Ml+Q9GlJr0iareiyM+sl7RtPa7aZoja+Q9J9kpZKel1R23xY0rHa0OaXKdq25Zo/ywRJzT3qZ1bI/nwXPku9pNMVXSrnIkn3xJlLkr6qqIg4VNFIxme1sY6Zko6OH7+pqLi5R1GblCR9RdL5kq5sJ8uPJTX3kq1UNFryI5LWKPoR6EOKfuA5qMOfrrr3K9p3hyoqvK+R9KKk3eLMO8bveYuZ1bv7q61XYGZbK/qsu8STFkv6s6RHFR3tsZeiInP7eJ191f4gVB3dvzIT9+b/WVF7SNF+eaWkJxSNZL2VpD0kfULRPlVpHZsq+iFvVDzpaUUF6iOK9rkPKSowd1O0H1xjZoe6+7sVVtfsQEX75npJFyv69/jNeB3PbvQHBVBbQlfF3Lhx6303bfh1e0nZtAvKpn+zwmtC9pS6pO9XWGagNvTyrVP05e05ScMqLHts2bqua+M9p7V6z4clbVdhuSMVfTl0RYXPjhWWGVm2zBJJu7bxnmMVFc4u6e9tLNPUKte3umEf2ETJHtcZkjapsNyZZcs8Lek9Xd32Hch2fKvP+09JQ9p5zcclbVll/nuV7GE8uAP7wOgOZL20bPm6CvNHt/osT1XaFyTtX7a/vKgKvaWKfphpXs9qSXtXWKauwv5yfKtl+kh6OZ7XVGkfb/U3tnsXtmXrHrWTKiwzQNJtZcv8so11XV22zH+1sb8OUPTDVPNyY7pj/+rgZ626L3R0WUU/LjTPmyepT5X17ClpmwrTf162jgsl9a+wTD9FhXjzcl/twP67UtKeXW0rbty41d6Nc0oBZOUcRQWWJJ1hZgNChmnlBnf/ceuJ7r5aUnMvZR9FvRsnu/tDFZb9naLD4STpkA6cu7dO0hfd/bkK67pG0s/ip5tL+lqF109V1FPzlqRx7v5YhWXk7gskTY+f7m9mH20n19Xu/vN2lumIcYp6liTpXkWFbqqXxN1/KGlB/HQHScd0w3tvDJc0wd1XVF3I/U53f7nK/NcV9Z417+PHdlvCjXNMpX3B3e+T9Mf46VaKitTWvlX2+GR3T/VAu3uT2u8dHKio51KSrq20j5etb7W7L2lnfR11hbv/usJ7rFH0o1dz7+hXzGzL8mXMbIQ29G5f4u7ntLG/Nq+ruZfztHYydWj/ytiHyh5f4u7r21rQ3Re7+wvl08xssKSvx09vcfeveYUjX9z9HUn/oagHVmq/rSTpRK9+NAeAXoqiFEAm4i+mM+Kn20k6NViYtIqHx8buLnu8StUPW7wrvu+vDYcAtuUGd3+kyvwZig5jk6TPls+IB4ZpPjTyWnd/vJ33+n3Z40+1s+wv25nfUeUjLZ/n7l5l2ellj7MeoflOd3+wO1bk7q9J+kf89CPdsc6N9IC731ll/q1lj/csn2Fm79GGfWOlosM7K/LoEOvUDzNl1rb1Pj3sZ23NcPdV2vB3sJmiw6zLlf+IcH61N3H3lyRdFz89KD6UtS3dtn91o/JLKu3V5lJt+4I2jIbdZptLLYVp848hu7YzQNNTinpuARQQ55QCyNJ5inr9tpb0HTP7jbu/GDiTJP29yrxVZY8bK/WetLFseyOK3lJtprs/a2b/lLS3pA+b2fvdvbl35kBt+FHxTTM7sp33Kh89c48qy61XdE5dd2juiXNJ7Z1f+zdtONcw62KuWhGXEBcfX1B0nu6+is6zHaDoHM7Wtu+WdBvn3nbmLy973Hr/3Fcb9pM72tnPpejw92GVZrj7K2Z2n6J94N/M7GpFP3bcGRcpPeEVRedrVnOrNvTw7afoPN9mH4/v35a0m5nt1s66Ni27/6CiQ3Qr6fD+laG7FP1wsJmkqfGPXLMrHQHSho+XPd6uA//+lO9reyg6pLtirnZ+vALQi1GUAshM/GX1XEnnKjq8b4qk/wybSlI0MFBbyq+LWW251su+p51l2+vdbF5mb0VFzwe04ZDBurJlvhzfOqpasfyCu7+5EeuqZnB8/2zcg9gmd3/XzJYqKoy2NrP+lQ4H7CHL219EMrN9FA38tGsH1/u+TifqvPYGSaq2fw4pe/yE2tfeMicp+uHlfYoOiz1S0utm9ndFRdHNku7uQPHbUUs7UNCU/80NaTWvLr7vrw2DanVUtb+pDu1fWXL3F83sW4rOBe2r6LDa08zsOUU/EN0p6S/u3lahXVf2+NKNfPuaaisA2eHwXQBZ+6WiEV0l6WQza/3lMHMb8cW4u75AS8lD6Nryetnj8nNw3996wY3Qv8q8tVXmbazma6O+XnWpDdZUeG0W2v3M8aisN2tDQfqMoi/0p0j6f4oOOf5sfGs+JDvE/69d2T/fW/Z4Y/fNFHdvUDRy62Xa0MbvlfRJRSP//lXSUjPrrnOIu/L3JNXG31S3cfeLFI2ue4s27DfbKfrx4GeSFpvZ3WZW6dzjQrUVgGzQUwogU+6+1sx+qOhL/WaKvqB+tTvfw8xq4Qe3zTuwTHmhsKaNx8e7++zuidStXpO0pZKfoZryIqFqz2oAJyv6wi5Fo4n+h7uvq7SgmZ2RWaruVV6wbey+WZG7PynpODM7UdHlXz4q6WOSDlb0t18n6XdmtmOlgcY2Ulf+npqfbympyd137mKWPGj330B3v0PSHWa2jaJDcg9QtG32i1//UUl3VbhUU3PbrZO0WVt/CwCwMWrhixuA3udiRdd5lKKRMD9UbeEyzYd0Vvu1XZK27VSqbHXkMzcv40pep6/8MLfODFSShZXx/QfMrGrPZ3zdxOaBoV7I8NDdjvq3+H6dpFPb+RK+UwZ5ekL56LAf7MDyHVlGkuTub7r7be7+I3f/tKIC/7uK9mtJOisujLpil3g/qqb8b671aLjNf1M7mFmIQ687ovzw6277N9DdX3D3a9z9u+4+StE1Xf8Qz+6n9MBPzW3VV9E1egGgyyhKAWQuHuzkrPhpX0k/6OBLX47v2zvkN8TIpxvrk9VmmtkHtGFQokfLBjmSokMfm7/QfyanPcP3xfem6PqX1XxUG3pK76u2YCCD4vsXql0WxsyGK7ocSjXlh9i2V0Rl6UFF1zGVohFl29unRnf2jdx9jbv/VNE5ulI0WNB+nV1f7P2SRrSzzCfKHt/fat4d8X0fSUd0MUtPebnscZv/BppZH0XXIu0Ud18u6Tht+CFspJltVrbIHWWPEyODA0Bn5fGLDIBimKMNl5WYoGiQm/Y0X79uJzOr1lPzza4Ey8gYM6s2Eu43FX1BlqT/K58RX17n+vjphxVdHzNvrip7/J12erG+28br8qL5fMXt2un1PavKvGblh4129NDmHhcPcHVj/HSIpKPbWtbMRquNkXc3UlPZ4+44najN62Ca2UBtuAbuWm34+2l2Wdnjs8wsN9umTPn1O6v9qDVB7f84UlV8NMCysknl2+cKbThq5VvxD2gA0CUUpQCCiEfKbD7/ziR9owMvK/8ieW6lQsfMfqANh1vmWV9Jf4y/LCeY2RGSvhM/fUPR+bet/Zc29Gz9sr0BY8xsRzM7z8y2q7ZcN1qgDYP+HCjpvEq9b2b2fW3omXpG0uXZxNsozb1qJumc1jMt8gNFg8S058myx+317GVtRtnjX5nZ3q0XiK8zeWm1lZjZcDM708wGVVlmW20ofF3Vr3vaUf/PzFLnp8cF5hxtGKDn4tY93u7+d234QeTDkua1k7+vmR1pZl9va5kecJM2XLv4JDNLHSpuZiW1c61hM/uSmf17q97P1suMkjQ8fvpE+Qja7v5M2XtsI+mGaqdgxH8fh9Tw+dYAMsBARwCCcff5ZvY3RYdvdqRn4hJFl5DZWtLnJd1pZpcruhTGjop6CEqKfsmf0COhu881ioqYR8xslqR/KBqs5TBFX9abC+7vxl8CE9x9oZl9TdIsRYc//s7Mvi3pWkWXvnhL0cAtuysqCveP1/mLHvtEyXzvmtmxku5WNKjNtyV9It5eyxQdEvsFRQPfSFGB/eVuvCRNd/qNpBMU9Vx/08zqFfVePytpB0Uj8A5X1JO1VtLIKuu6U9Fn7SfpdDNrLsiazxd80d2DHMLs7jeb2aWSjld0TuL98fO/KTrsuKSoHd4n6UpFf4NSetTf9ys6JH+qmd0dv/5RRQNYbS1pH0VttnW8/OXu/nQX4y9StL9fGF8382pFh7s2H0nQXMA9qQ0/hrV2Qrz8PooO9X3CzK5UdO3e5xVdRmewoh8TPhXnv7iLuTvM3VeY2R8kHRu/9/1m9htF+90ARYdUT5T0kqJrsrbVm7qrpKmKfsy6SdGPLs8o2ge3UzTo0ZHacKRGpUGovqdodOVDFPWaLzazaxWdWvCsov17kKIjYA5V1Pt+i6QfdeazA+j9KEoBhPY9Jc9RapO7r457BP9P0RfEA+NbufmKvoTmvSj9haIBQ06S9P0K813SD9z9V22twN0vjq8tOEvRF8D6+NaWFyRlVvS5+wNmdoiiHqjmL/OVegdflPT/Wo3wmRvuvsjMviHpV4qOMDoovpX7p6TPSPptO+t63szOV7TfD1D6fOo71IXzNbvBZEW5Pq/ob+yrSo6O/a6iXvxXtKEobT1acnOR2keV26rcn+L37KpXJP27or//w+Jba0slfcrdX620And/1cwOVPT39EVFPxK1dx3g1gMm9bRTFRXN9YoO0Z3aav5KRed5fq3KOpq3z3u14Rqylbwj6Ux3TxXe7v6OmR2u6PIxX1NUhH5eG/aJSrgOKYA2cfgugKDc/a9Kn99Vbfm/KPpC9r+SnlZ0btNqSbcp6kEY7+4duWZhcO5+sqSxkuYp+nL7dnz/R0kHuvu0DqxjnqSdFRUOcxX1eKzVhna5R9GhdkdIGuLuz3f7B6me7x5FPTOnKSq4Viv6svtCnO0MSbu4+w1Z5tpY7n6hoh9A/qyoJ+gdSc8p6gU8TVLJ3R/v4Lq+r6hH6/p4XbkZbdjd33H3oyUdpSjfakU9aE8rOrT6QHf/maLDNpu92Godf5W0m6Ji5U+Slig6l/bd+H6xoqMeDnb3L7p7t1yf0t0XKeqxPkfSw4qK5TWSHlD0w88wd3+inXW85u4TFP14MiN+7QuKRl5eI+kxRUc5nKZov+3IecTdxt1fVHRkyZQ42xpFl/NZrKgXct/4UORqfiRplKI2uV7Rub1rFX3GlxQNNnaupD3d/dwqWd52928oOhpjuqS/K9pf1ik67eBJSddpQ9sft/GfGEBRWHRaFwAAQMeY2VWSPhc/3SYulkLkaP4Sc4e7jw6RAQDQdfSUAgCADosHOxoXP30wVEEKAOg9KEoBAIAkycx2MbPtq8wfqmgQof7xpIsyCQYA6NUY6AgAADQ7QNL/mtlfFY0UvFTR+YbbKDoP8QuKBgCSpHslzQwREgDQu1CUAgCAcn0VXU6krUuKSNLtko5y9/VVlgEAoEMoSgEAQLO5kr4kaYyikWy3VXRNzLclrVI0wuoV8ajPAAB0i1yMvrvtttt6XV1d6Bg9ZvXq1Ro4cGDoGC3ylqcoaHcAAAAUVWNj4/PuXvHLcC56Suvq6tTQ0BA6BgAAAACgB5jZU23NY/TdDEybNi10hIS85SkK2h0AAABIy8Xhu6VSyXtzT6mZKQ/t3CxveYqCdgcAAEBRmVmju5cqzaOnFAAAAAAQDEUpAAAAACAYitIM5O3Q5LzlKQraHQAAAEijKAUAAAAABMNARxnI2wA3ectTFLQ7AAAAioqBjgAAAAAAudRuUWpml5jZc2b2cNm0P5rZovjWZGaL4ul1Zra2bN7/9GB2AAAAAECN69uBZS6V9CtJlzVPcPcvNj82s59JeqVs+aXuXt9N+XqFqVOnho6QkLc8RUG7AwAAAGkdOqfUzOokzXf3vVtNN0lPS/qkuz/W1nLt6e3nlAIAAABAkfXkOaUfl7TK3R8rm7azmT1gZneY2cerhJpsZg1m1rB69eouxsi3IUOGhI6QkLc8RUG7AwAAAGldLUonSppT9nylpB3dfbik0yT9wczeV+mF7j7T3UvuXho4cGAXY+TbypUrWx4/88wz+sQnPqE99thDe+21l37xi1+0zJs2bZqGDh2q+vp61dfX67rrrpMk3X333Ro2bJj2228/Pf7445Kkl19+WYcddlinRnMtz9MV11xzjRYvXtzy/KyzztLNN98sSRo9enSnr8t57bXXatiwYaqvr1epVNJdd90lSXrzzTe1//77a99999Vee+1Vc4fDttfu//rXv1q2fX19vd73vvdpxowZkrLbN7pLT+0bS5Ys0QEHHKBNN91U559/fmr++vXrNXz4cI0bN65l2qJFizRq1KiW/em+++7r1HsDAACgh7h7uzdJdZIebjWtr6RVkrav8rrbJZXaW//IkSO9N4uaObJixQpvbGx0d/dXX33Vd911V3/kkUfc3X3q1Kl+3nnnpV7/2c9+1h999FG/8cYb/bTTTnN399NOO81vv/32LufpiuOOO87//Oc/V5x38MEH+/3339+p9b722mv+7rvvurv7gw8+6Lvttpu7u7/77rv+2muvubv722+/7fvvv7/fc889nXqPEDam3detW+eDBg3ypqYmd89u3+guPbVvrFq1yu+77z7//ve/X7E9fvazn/nEiRN97NixLdMOPfRQv+6669zdfcGCBX7wwQd36r0BAADQeZIavI16sCs9pf8maYm7L2ueYGYDzaxP/PiDknaV9EQX3qNXGDFiRMvjwYMHtzzfYosttMcee2j58uVVX9+vXz+tXbtWb7zxhvr166elS5dq+fLlOvjgg9t8zfXXX6/dd99dH/vYx/TNb36zpeeoucet2d57762mpiZJ0pFHHqmRI0dqr7320syZM1uWGTBggM444wztu+++GjVqlFatWqW//e1vmjt3rk4//XTV19dr6dKlOv7443XllVemstx444064IADNGLECB199NFas2ZN1c87YMAARacrS6+//nrLYzPTgAEDJEnvvPOO3nnnnZZ5taB8P2jPLbfcol122UU77bRT1eW6e98o733M476x3Xbbab/99lO/fv1S85YtW6YFCxboP/7jPxLTzUyvvvqqJOmVV17hMGoAAICc6cglYeZIukfSbma2zMy+Es+aoOShu5J0kKSHzOxBSVdK+qq7v9idgWtRY2NjxelNTU164IEH9JGPfKRl2q9+9SsNGzZMJ5xwgl566SVJ0ve+9z1NnjxZM2bM0Mknn6wzzjhDP/zhD9t8vzfffFOTJk3SvHnzdOedd+rZZ59NzD/11FMrvu6SSy5RY2OjGhoadMEFF+iFF16QFBWGo0aN0oMPPqiDDjpIs2bN0kc/+lGNHz9e5513nhYtWqRddtml4jqff/55nXPOObr55pu1cOFClUol/fd//7ek6JDOuXPnVnzd1Vdfrd13311jx47VJZdc0jJ9/fr1qq+v13bbbadDDz000XZ519Z+UMkVV1yhiRMnJqZlsW+0JU/7RltOPfVU/fSnP9UmmyT/WZsxY4ZOP/107bDDDvrOd76jn/zkJxu1XgAAAPSsdotSd5/o7oPdvZ+7b+/uF8fTj3f3/2m17FXuvpe77+vuI9x9Xk8FryWTJ09OTVuzZo2OOuoozZgxQ+97X3Ta7de+9jUtXbpUixYt0uDBg/Xtb39bklRfX697771Xt912m5544gkNGTJE7q4vfvGLOuaYY7Rq1arEupcsWaKdd95Zu+66q8xMxxxzTGJ+pR4rSbrgggtaeryeeeYZPfZYNH5V//79W3rTRo4c2dJ71hH33nuvFi9erAMPPFD19fWaPXu2nnrqKUnSD37wA40fP77i6z772c9qyZIluuaaa3TmmWe2TO/Tp48WLVqkZcuW6b777tPDDz9c8fV5VGk/qOTtt9/W3LlzdfTRR7dMy2rfaEue9o1K5s+fr+22204jR45Mzbvwwgv185//XM8884x+/vOf6ytf+UqFNQAAACCUrg50hA6YNWtW4vk777yjo446Sl/60pf0uc99rmX6oEGD1KdPH22yySaaNGlSakAWd9c555yjM888U2effbbOPvtsHXPMMbrgggtS79nWYa19+/bV3//+95bnb775piTp9ttv180336x77rlHDz74oIYPH94yr1+/fi3r69Onj9atW9fhz+7uOvTQQ7Vo0SItWrRIixcv1sUXX9zh1x900EFaunSpnn/++cT0LbfcUqNHj9b111/f4XWF1no/aMtf/vIXjRgxQoMGDWqZltW+8e6777Y8z/u+Ue7uu+/W3LlzVVdXpwkTJujWW29tKbhnz57d8nd29NFHM9ARAABAzlCUZszd9ZWvfEV77LGHTjvttMS88tFZr776au29d/Jyr7Nnz9bYsWO11VZb6Y033tAmm2yiTTbZRG+88UZiud13311PPvmkli5dKkmaM2fDUdZ1dXUtjxcuXKgnn3xSUnSu3VZbbaXNN99cS5Ys0b333tvuZ9liiy302muvVV1m1KhRuvvuu1tGhn3jjTf06KOPVn3N448/3jJy7MKFC/X2229rm2220erVq/Xyyy9LktauXaubb75Zu+++e7s5a82cOXNSh+5mtW8sXLhQUn73jbb85Cc/0bJly9TU1KQrrrhCn/zkJ/X73/9eUnQpnjvuuEOSdOutt2rXXXft1HsAAACgZ/QNHaBW1E1ZUHV+0/SxHVrP3Xffrd/97nfaZ599VF9fL0n68Y9/rMMPP1z/+Z//qUWLFsnMVFdXp4suuqjldW+88YZmz56tG2+8UZJ02mmn6aijjlL//v0ThYUkvec979HMmTM1duxYbbvttvrYxz7WcpjrUUcdpWOPPVb19fXab7/99OEPf1iSNGbMGP3P//yPhg0bpt12202jRo1q97NMmDBBkyZN0gUXXNDmIcEDBw7UpZdeqokTJ+qtt96SJJ1zzjn68Ic/rLPOOkulUil1mOZVV12lyy67TP369dNmm22mP/7xjzIzrVy5Uscdd5zWr1+vd999V1/4whcSl/7oDd544w3ddNNNiW0vKbN947LLLsv1vvHss8+qVCrp1Vdf1SabbKIZM2Zo8eLFLYfAVzJr1iydcsopWrduXcvnBwAAQH5Yc49USKVSyTt73cKsdKUoXbFiRdARP2+//Xadf/75mj9/fi7yFFUe2731vgEAAAD0BDNrdPdSpXkcvpuBjRl1NQt5y1MUtDsAAACQRk9pB3Wlp9TMlId2bpa3PEVBuwMAAKCo6CkFAAAAAOQSRSkAAAAAIBiK0gy0Hkk1tLzlKQraHQAAAEjjnNIO6q5LwgAAAABA0XBOaWBmFjpCQt7yFAXtDgAAAKRRlAIAAAAAgqEoBQAAAAAEQ1GagXHjxoWOkJC3PEVBuwMAAABpFKUZmDdvXugICXnLUxS0OwAAAJBGUZqBI444InSEhLzlKQraHQAAAEijKM3A/PnzQ0dIyFueoqDdAQAAgDSKUqSYmY499tiW5+vWrdPAgQM3+pzI0aNHq/n6s4cffrhefvnl7owpSXr66af1qU99SnvssYf23HNPNTU1SZKOP/547bzzzqqvr1d9fb0WLVrU7e8NAAAAoOv6hg6A/Hnve9+rhx9+WGvXrtVmm22mm266SUOHDu3SOq+77rpuSpf05S9/WWeccYYOPfRQrVmzRptssuF3lvPOO0+f//zne+R9AQAAAHQPekoz4O6hIyR0JM+nP/1pLViwQJI0Z84cTZw4sWXe66+/rhNOOEH77befhg8frmuvvVaStHbtWk2YMEHDhg3TF7/4Ra1du7blNXV1dXr++eclSUceeaRGjhypvfbaSzNnzmxZZsCAATrjjDO07777atSoUVq1alXVjIsXL9a6det06KGHtrx+880372ArZC9v+wEAAACQBxSlGSgvvPKgI3kmTJigK664Qm+++aYeeughfeQjH2mZ96Mf/Uif/OQndf/99+u2227T6aefrtdff10XXnihNt98cz300EM644wz1NjYWHHdl1xyiRobG9XQ0KALLrhAL7zwgqSo2B01apQefPBBHXTQQZo1a5Ykae7cuTrrrLNS63n00Ue15ZZb6nOf+5yGDx+u008/XevXr2+Zf8YZZ2jYsGH61re+pbfeemuj2qgn5G0/AAAAAPKAojQDJ554YugICR3JM2zYMDU1NWnOnDk6/PDDE/NuvPFGTZ8+XfX19Ro9erTefPNNPf300/rrX/+qY445puX1w4YNq7juCy64oKU39JlnntFjjz0mSerfv3/LeasjR45sOT90/Pjx+sEPfpBaz7p163TnnXfq/PPP1/33368nnnhCl156qSTpJz/5iZYsWaL7779fL774os4999wOtU1Pytt+AAAAAOQBRSnaNH78eH3nO99JHLorRYehXnXVVVq0aJEWLVqkp59+WnvssYekaJCkam6//XbdfPPNuueee/Tggw9q+PDhevPNNyVJ/fr1a3l9nz59tG7duqrr2n777TV8+HB98IMfVN++fXXkkUdq4cKFkqTBgwfLzLTpppvq3//933Xfffd1qg0AAAAA9CyKUrTphBNO0FlnnaV99tknMf2www7TL3/5y5ZzJB944AFJ0kEHHaTLL79ckvTwww/roYceSq3zlVde0VZbbaXNN99cS5Ys0b333tvpfPvtt59eeuklrV69WpJ06623as8995QkrVy5UlJUQF9zzTXae++9O/0+AAAAAHoORWkG5s6dGzpCQkfzbL/99jrllFNS088880y98847GjZsmPbee2+deeaZkqSvfe1rWrNmjYYNG6af/vSn2n///VOvHTNmjNatW6dhw4bpzDPP1KhRozqUt9I5pX369NH555+vQw45RPvss4/cXZMmTZIkfelLX9I+++yjffbZR88//7z+67/+q0OfuSflbT8AAAAA8sDyMCJoqVTy5utZ5lXdlAVV5zdNH9vmvBUrVmjIkCHdHanT8panKGh3AAAAFJWZNbp7qdI8ekoz0NVrfHa3vOUpCtodAAAASKMoBQAAAAAEQ1EKAAAAAAiGojQDzYPv5EXe8hQF7Q4AAACkMdBRB3VloCMAAAAAKDIGOgps5MiRoSMk5C1PUdDuAAAAQBpFaQYWLlwYOkJC3vIUBe0OAAAApFGUAgAAAACCoSjNwODBg0NHSMhbnqKg3QEAAIA0itIMrFixInSEhLzlKQraHQAAAEijKM3AtGnTQkdIyFueoqDdAQAAgDQuCdNBXbkkjJkpD+3cLG95ioJ2BwAAQFFxSRgAAAAAQC5RlAIAAAAAgqEozUDeDk3OW56ioN0BAACANIpSAAAAAEAwFKUZKJUqns8bTN7yFAXtDgAAAKRRlAIAAAAAgqEoBQAAAAAEQ1GagalTp4aOkJC3PEVBuwMAAABp5u6hM6hUKnneRyatm7Kg6vym6WMzSgIAAAAAtcXMGt294iAr9JRmYMiQIaEjJOQtT1HQ7gAAAEAaRWkGVq5cGTpCQt7yFAXtDgAAAKRRlAIAAAAAgqEozcCIESNCR0jIW56ioN0BAACAtHaLUjO7xMyeM7OHy6ZNM7PlZrYovh1eNu97Zva4mf3LzA7rqeC1pLGxMXSEhLzlKQraHQAAAEjrSE/ppZLGVJj+c3evj2/XSZKZ7SlpgqS94tf8xsz6dFfYWjV58uTQERLylqcoaHcAAAAgrd2i1N3/KunFDq7vM5KucPe33P1JSY9L2r8L+XqFWbNmhY6QkLc8RUG7AwAAAGldOaf0ZDN7KD68d6t42lBJz5QtsyyeBgAAAABASmeL0gsl7SKpXtJKST+Lp1uFZb3SCsxsspk1mFnD6tWrOxkDAAAAAFDLOlWUuvsqd1/v7u9KmqUNh+guk7RD2aLbS1rRxjpmunvJ3UsDBw7sTIyasXz58tAREvKWpyhodwAAACCtU0WpmQ0ue/pZSc0j886VNMHMNjWznSXtKum+rkWsfXkbdTVveYqCdgcAAADS+ra3gJnNkTRa0rZmtkzSVEmjzaxe0aG5TZJOlCR3f8TM/iRpsaR1kk5y9/U9kryGjB8/Xu4Vj2IOIm95ioJ2BwAAANLaLUrdfWKFyRdXWf5Hkn7UlVAAAAAAgGLoyui7AAAAAAB0CUVpBi666KLQERLylqcoaHcAAAAgzfJwjlupVPKGhobQMaqqm7Kg6vym6WMzSgIAAAAAtcXMGt29VGkePaUZMKt0+dZw8panKGh3AAAAII2iFAAAAAAQDEUpAAAAACAYitIMjBs3LnSEhLzlKQraHQAAAEijKM3AvHnzQkdIyFueoqDdAQAAgDSK0gwcccQRoSMk5C1PUdDuAAAAQBpFaQbmz58fOkJC3vIUBe0OAAAApFGUAgAAAACCoSgFAAAAAARDUZoBdw8dISFveYqCdgcAAADSKEozMHPmzNAREvKWpyhodwAAACDN8tB7UyqVvKGhIXSMquqmLKg6v2n62DbnmVmuesnylqcoaHcAAAAUlZk1unup0jx6SgEAAAAAwVCUAgAAAACCoSjNwNy5c0NHSMhbnqKg3QEAAIA0itIMjBw5MnSEhLzlKQraHQAAAEijKM3A0KFDQ0dIyFueoqDdAQAAgDSKUgAAAABAMBSlAAAAAIBgKEozMGnSpNAREvKWpyhodwAAACDN3D10BpVKJW9oaAgdo6q6KQuqzm+aPjajJAAAAABQW8ys0d1LlebRU5qBvI26mrc8RUG7AwAAAGkUpRlYuHBh6AgJectTFLQ7AAAAkEZRCgAAAAAIhqI0A4MHDw4dISFveYqCdgcAAADSKEozsGLFitAREvKWpyhodwAAACCNojQD06ZNCx0hIW95ioJ2BwAAANK4JEwHdeWSMGamPLRzs7zlKQraHQAAAEXFJWEAAAAAALlEUQoAAAAACIaiNAN5OzQ5b3mKgnYHAAAA0ihKAQAAAADBUJRmoFSqeD5vMHnLUxS0OwAAAJBGUQoAAAAACIaiFAAAAAAQDEVpBqZOnRo6QkLe8hQF7Q4AAACkmbuHzqBSqeR5H5m0bsqCqvObpo/NKAkAAAAA1BYza3T3ioOs0FOagSFDhoSOkJC3PEVBuwMAAABpFKUZWLlyZegICXnLUxS0OwAAAJBGUQoAAAAACIaiNAMjRowIHSEhb3mKgnYHAAAA0ihKM9DY2Bg6QkLe8hQF7Q4AAACkUZRmYPLkyaEjJOQtT1HQ7gAAAEAal4TpoK5cEsbMlId2bpa3PEVBuwMAAKCouCQMAAAAACCXKEoBAAAAAMFQlGZg+fLloSMk5C1PUdDuAAAAQBpFaQbyNupq3vIUBe0OAAAApLVblJrZJWb2nJk9XDbtPDNbYmYPmdnVZrZlPL3OzNaa2aL49j89mL1mjB8/PnSEhLzlKQraHQAAAEjrSE/ppZLGtJp2k6S93X2YpEclfa9s3lJ3r49vX+2emAAAAACA3qjdotTd/yrpxVbTbnT3dfHTeyVt3wPZAAAAAAC9XHecU3qCpL+UPd/ZzB4wszvM7OPdsP6ad9FFF4WOkJC3PEVBuwMAAABp5u7tL2RWJ2m+u+/davoZkkqSPufubmabShrg7i+Y2UhJ10jay91frbDOyZImS9KOO+448qmnnurqZ+lRdVMWVJ3fNH1sRkkAAAAAoLaYWaO7lyrN63RPqZkdJ2mcpC95XNm6+1vu/kL8uFHSUkkfrvR6d5/p7iV3Lw0cOLCzMWqCmYWOkJC3PEVBuwMAAABpnSpKzWyMpO9KGu/ub5RNH2hmfeLHH5S0q6QnuiMoAAAAAKD36dveAmY2R9JoSdua2TJJUxWNtruppJvi3p9745F2D5L0AzNbJ2m9pK+6+4sVVwwAAAAAKLx2i1J3n1hh8sVtLHuVpKu6Gqq3GTduXOgICXnLUxS0OwAAAJDWHaPvoh3z5s0LHSEhb3mKgnYHAAAA0ihKM3DEEUeEjpCQtzxFQbsDAAAAaRSlGZg/f37oCAl5y1MUtDsAAACQRlEKAAAAAAiGohQAAAAAEAxFaQbcPXSEhLzlKQraHQAAAEijKM3AzJkzQ0dIyFueoqDdAQAAgDTLQ+9NqVTyhoaG0DGqqpuyoOr8pulj25xnZrnqJctbnqKg3QEAAFBUZtbo7qVK8+gpBQAAAAAEQ1EKAAAAAAiGojQDc+fODR0hIW95ioJ2BwAAANIoSjMwcuTI0BES8panKGh3AAAAII2iNANDhw4NHSEhb3mKgnYHAAAA0ihKAQAAAADBUJQCAAAAAIKhKM3ApEmTQkdIyFueoqDdAQAAgDRz99AZVCqVvKGhIXSMquqmLKg6v2n62IySAAAAAEBtMbNGdy9VmkdPaQbyNupq3vIUBe0OAAAApFGUZmDhwoWhIyTkLU9R0O4AAABAGkUpAAAAACAYitIMDB48OHSEhLzlKQraHQAAAEijKM3AihUrQkdIyFueoqDdAQAAgDSK0gxMmzYtdISEvOUpCtodAAAASOOSMB3UlUvCmJny0M7N8panKGh3AAAAFBWXhAEAAAAA5BJFKQAAAAAgGIrSDOTt0OS85SkK2h0AAABIoygFAAAAAARDUZqBUqni+bzB5C1PUdDuAAAAQBpFKQAAAAAgGIpSAAAAAEAwFKUZmDp1augICXnLUxS0OwAAAJBm7h46g0qlkud9ZNK6KQuqzm+aPjajJAAAAABQW8ys0d0rDrJCT2kGhgwZEjpCQt7yFAXtDgAAAKRRlGZg5cqVoSMk5C1PUdDuAAAAQBpFKQAAAAAgGIrSDIwYMSJ0hIS85SkK2h0AAABIoyjNQGNjY+gICXnLUxS0OwAAAJBGUZqByZMnh46QkLc8RUG7AwAAAGlcEqaDunJJGDNTHtq5Wd7yFAXtDgAAgKLikjAAAAAAgFyiKAUAAAAABENRmoHly5eHjpCQtzxFQbsDAAAAaRSlGcjbqKt5y1MUtDsAAACQRlGagfHjx4eOkJC3PEVBuwMAAABpFKUAAAAAgGAoSgEAAAAAwVCUZuCiiy4KHSEhb3mKgnYHAAAA0szdQ2dQqVTyhoaG0DGqqpuyoOr8puljM0oCAAAAALXFzBrdvVRpHj2lGTCz0BES8panKGh3AAAAII2iFAAAAAAQTLtFqZldYmbPmdnDZdO2NrObzOyx+H6rsnnfM7PHzexfZnZYTwUHAAAAANS+jvSUXippTKtpUyTd4u67Srolfi4z21PSBEl7xa/5jZn16ba0NWrcuHGhIyTkLU9R0O4AAABAWrtFqbv/VdKLrSZ/RtLs+PFsSUeWTb/C3d9y9yclPS5p/+6JWrvmzZsXOkJC3vIUBe0OAAAApHX2nNJB7r5SkuL77eLpQyU9U7bcsnhaoR1xxBGhIyTkLU9R0O4AAABAWncPdFRpeNGK15wxs8lm1mBmDatXr+7mGPkyf/780BES8panKGh3AAAAIK2zRekqMxssSfH9c/H0ZZJ2KFtue0krKq3A3We6e8ndSwMHDuxkDAAAAABALetsUTpX0nHx4+MkXVs2fYKZbWpmO0vaVdJ9XYsIAAAAAOit+ra3gJnNkTRa0rZmtkzSVEnTJf3JzL4i6WlJR0uSuz9iZn+StFjSOkknufv6HspeM9wrHsEcTN7yFAXtDgAAAKR1ZPTdie4+2N37ufv27n6xu7/g7oe4+67x/Ytly//I3Xdx993c/S89G782zJw5M3SEhLzlKQraHQAAAEizPPTelEolb2hoCB2jqropC6rOb5o+ts15ZparXrK85SkK2h0AAABFZWaN7l6qNK+7R98FAAAAAKDDKEoBAAAAAMFQlGZg7ty5oSMk5C1PUdDuAAAAQBpFaQZGjhwZOkJC3vIUBe0OAAAApFGUZmDo0KGhIyTkLU9R0O4AAABAGkUpAAAAACAYilIAAAAAQDAUpRmYNGlS6AgJectTFLQ7AAAAkGbuHjqDSqWSNzQ0hI5RVd2UBVXnN00fm1ESAAAAAKgtZtbo7qVK8+gpzUDeRl3NW56ioN0BAACANIrSDCxcuDB0hIS85SkK2h0AAABIoygFAAAAAARDUZqBwYMHh46QkLc8RUG7AwAAAGkUpRlYsWJF6AgJectTFLQ7AAAAkEZRmoFp06aFjpCQtzxFQbsDAAAAaVwSpoO6ckkYM1Me2rlZ3vIUBe0OAACAouKSMAAAAACAXKIoBQAAAAAEQ1Gagbwdmpy3PEVBuwMAAABpFKUAAAAAgGAoSjNQKlU8nzeYvOUpCtodAAAASKMoBQAAAAAEQ1EKAAAAAAiGojQDU6dODR0hIW95ioJ2BwAAANLM3UNnUKlU8ryPTFo3ZUHV+U3Tx2aUBAAAAABqi5k1unvFQVboKc3AkCFDQkdIyFueoqDdAQAAgDSK0gysXLkydISEvOUpCtodAAAASKMoBQAAAAAEQ1GagREjRoSOkJC3PEVBuwMAAABpFKUZaGxsDB0hIW95ioJ2BwAAANIoSjMwefLk0BES8panKGh3AAAAII1LwnRQVy4JY2bKQzs3y1ueoqDdAQAAUFRcEgYAAAAAkEsUpQAAAACAYChKM7B8+fLQERLylqcoaHcAAAAgjaI0A3kbdTVveYqCdgcAAADSKEozMH78+NAREvKWpyhodwAAACCtb+gAvUV7o/MCAAAAANLoKQUAAAAABENRmoGtDzs5dISEiy66KHSEQqLdAQAAgDSK0gxsUT8mdISEyZMnh45QSLQ7AAAAkEZRmoGnzh0XOkKCmYWOUEi0OwAAAJBGUQoAAAAACIaiFAAAAAAQDEVpBjbbZb/QERLGjcvX4cRFQbsDAAAAaRSlGdju81NDR0iYN29e6AiFRLsDAAAAaRSlGXjuyrNDR0g44ogjQkcoJNodAAAASKMozcDapfeHjpAwf/780BEKiXYHAAAA0ihKAQAAAADBUJQCAAAAAILp29kXmtlukv5YNumDks6StKWkSZJWx9O/7+7XdfZ9eoOdvpuvwzbdPXSEQqLdAQAAgLRO95S6+7/cvd7d6yWNlPSGpKvj2T9vnlf0glSSXlt0fegICTNnzgwdoZBodwAAACCtuw7fPUTSUnd/qpvW16u8eMOvQkdIOPHEE0NHKCTaHQAAAEjrrqJ0gqQ5Zc9PNrOHzOwSM9uqm94DAAAAANDLdLkoNbP+ksZL+nM86UJJu0iql7RS0s/aeN1kM2sws4bVq1dXWgQAAAAA0Mt1R0/ppyUtdPdVkuTuq9x9vbu/K2mWpP0rvcjdZ7p7yd1LAwcO7IYY+TXwqDNDR0iYO3du6AiFRLsDAAAAad1RlE5U2aG7Zja4bN5nJT3cDe9R0/oP+lDoCAkjR44MHaGQaHcAAAAgrUtFqZltLulQSf9XNvmnZvYPM3tI0ickfasr79EbLP/NcaEjJAwdOjR0hEKi3QEAAIC0Tl+nVJLc/Q1J27SadmyXEgEAAAAACqO7Rt8FAAAAAGCjUZRmYMC+h4WOkDBp0qTQEQqJdgcAAADSKEozsM2Yb4SOkDBz5szQEQqJdgcAAADSKEozsPLSU0JHSGAU2DBodwAAACCNojQDb69aGjpCwsKFC0NHKCTaHQAAAEijKAUAAAAABENRmoE+A7YOHSFh8ODBoSMUEu0OAAAApFGUZmD7ky4LHSFhxYoVoSMUEu0OAAAApFGUZuDluy4PHSFh2rRpoSMUEu0OAAAApJm7h86gUqnkDQ0NoWNUVTdlQadf+9S545SHdm5mZrnKUxS0OwAAAIrKzBrdvVRpHj2lAAAAAIBgKEoBAAAAAMFQlGbgA8fNCB0hIe+HSvdWtDsAAACQRlEKAAAAAAiGojQDz84+NXSEhFKp4vnF6GG0OwAAAJBGUQoAAAAACIaiFAAAAAAQDEVpBt5/4MTQERKmTp0aOkIh0e4AAABAmrl76AwqlUqe95FJ66Ys6NLrm6aP7aYkAAAAAFBbzKzR3SsOskJPaQaW/frLoSMkDBkyJHSEQqLdAQAAgDSK0gysX/Ni6AgJK1euDB2hkGh3AAAAII2iFAAAAAAQDEVpBvoP2iV0hIQRI0aEjlBItDsAAACQRlGagcHH/yJ0hITGxsbQEQqJdgcAAADSKEoz8ML1vwwdIWHy5MmhIxQS7Q4AAACkUZRmYM2DN4SOkDBr1qzQEQqJdgcAAADSKEoBAAAAAMFQlAIAAAAAgqEozcDQr88OHSFh+fLloSMUEu0OAAAApFGUZuDtVY+HjpDAKLBh0O4AAABAGkVpBlZf9cPQERLGjx8fOkIh0e4AAABAGkUpAAAAACAYilIAAAAAQDAUpRnY+rCTQ0dIuOiii0JHKCTaHQAAAEgzdw+dQaVSyRsaGkLHqKpuyoIeXX/T9LE9un4AAAAACMXMGt29VGkePaUZeOrccaEjJJhZ6AiFRLsDAAAAaRSlAAAAAIBgKEoBAAAAAMFQlGZgs132Cx0hYdy4fB1OXBS0OwAAAJBGUZqB7T4/NXSEhHnz5oWOUEi0OwAAAJBGUZqB5648O3SEhCOOOCJ0hEKi3QEAAIC0vqEDFMHapfe3u0x7l5zpzkvGzJ8/v9vWhY6j3QEAAIA0ekoBAAAAAMFQlAIAAAAAgqEozcBO383XYZvuHjpCIdHuAAAAQBpFaQZeW3R96AgJM2fODB2hkGh3AAAAII2iNAMv3vCr0BESTjzxxNARCol2BwAAANIoSgEAAAAAwVCUAgAAAACCoSjNwMCjzgwdIWHu3LmhIxQS7Q4AAACkUZRmoP+gD4WOkDBy5MjQEQqJdgcAAADSKEozsPw3x4WOkDB06NDQEQqJdgcAAADS+nblxWbWJOk1SeslrXP3kpltLemPkuokNUn6gru/1LWYAAAAAIDeqDt6Sj/h7vXuXoqfT5F0i7vvKumW+DkAAAAAACk9cfjuZyTNjh/PlnRkD7xHTRmw72GhIyRMmjQpdIRCot0BAACANHP3zr/Y7ElJL0lySRe5+0wze9ndtyxb5iV336rCaydLmixJO+6448innnqq0zmyUDdlQdD3b5o+Nuj7AwAAAEBnmVlj2dG1CV3tKT3Q3UdI+rSkk8zsoI6+0N1nunvJ3UsDBw7sYox8W3npKaEjJDAKbBi0OwAAAJDWpaLU3VfE989JulrS/pJWmdlgSYrvn+tqyFr39qqloSMkLFy4MHSEQqLdAQAAgLROF6Vm9l4z26L5saRPSXpY0lxJzddAOU7StV0NCQAAAADonbpySZhBkq42s+b1/MHdrzez+yX9ycy+IulpSUd3PWZt6zNg69AREgYPHhw6QiHR7gAAAEBap4tSd39C0r4Vpr8g6ZCuhOpttj/psh5/j44MxNQ8WNKKFSt6Og4qoN0BAACAtJ64JAxaefmuy0NHSJg2bVroCIVEuwMAAABpFKUZeOXuOaEjJJx99tmhIxQS7Q4AAACkdeWcUmQo9HVSAQAAAKAn0FMKAAAAAAiGojQDHzhuRugICQ0NDaEjFBLtDgAAAKRRlAIAAAAAgqEozcCzs08NHSGhVCqFjlBItDsAAACQRlEKAAAAAAiGohQAAAAAEAxFaQbef+DE0BESpk6dGjpCIdHuAAAAQJq5e+gMKpVKnveRSXvDdUKbpo8NHQEAAABAAZlZo7tXHGSFntIMLPv1l0NHSBgyZEjoCIVEuwMAAABpFKUZWL/mxdARElauXBk6QiHR7gAAAEAaRSkAAAAAIBiK0gz0H7RL6AgJI0aMCB2hkGh3AAAAII2iNAODj/9F6AgJjY2NoSMUEu0OAAAApFGUZuCF638ZOkLC5MmTQ0coJNodAAAASKMozcCaB28IHSFh1qxZoSMUEu0OAAAApFGUAgAAAACCoSgFAAAAAARDUZqBoV+fHTpCwvLly0NHKCTaHQAAAEijKM3A26seDx0hgVFgw6DdAQAAgDSK0gysvuqHoSMkjB8/PnSEQqLdAQAAgDSKUgAAAABAMBSlAAAAAIBgKEozsPVhJ4eOkHDRRReFjlBItDsAAACQRlGagS3qx4SOkDB58uTQEQqJdgcAAADSKEoz8NS540JHSDCz0BEKiXYHAAAA0ihKAQAAAADBUJQCAAAAAIKhKM3AZrvsFzpCwrhx+TqcuChodwAAACCNojQD231+augICfPmzQsdoZBodwAAACCNojQDz115dugICUcccUToCIVEuwMAAABpFKUZWLv0/tAREubPnx86QiHR7gAAAEAaRSkAAAAAIJi+oQMgO3VTFlR83Kxp+tgs4wAAAAAAPaVZ2Om7+TpsM295isLdQ0cAAAAAcoeiNAOvLbo+dISEvOUpipkzZ4aOAAAAAOQORWkGXrzhV6EjJOQtT1GceOKJoSMAAAAAuUNRCgAAAAAIhoGO0KLS4EflGAgJAAAAQHejpzQDA486M3SEhLzlKYq5c+eGjgAAAADkDkVpBvoP+lDoCAl5y1MUI0eODB0BAAAAyB2K0gws/81xoSMk5C1PUQwdOjR0BAAAACB3KEoBAAAAAMFQlAIAAAAAgqEozcCAfQ8LHSEhb3mKYtKkSaEjAAAAALlDUZqBbcZ8I3SEhLzlKYqZM2eGjgAAAADkDkVpBlZeekroCAl5y1MUjL4LAAAApFGUZuDtVUtDR0jIW56iWLhwYegIAAAAQO5QlAIAAAAAgul0UWpmO5jZbWb2TzN7xMxOiadPM7PlZrYovh3efXFrU58BW4eOkJC3PEUxePDg0BEAAACA3Onbhdeuk/Rtd19oZltIajSzm+J5P3f387ser3fY/qTLQkdIyFueolixYkXoCAAAAEDudLqn1N1XuvvC+PFrkv4paWh3BetNXr7r8tAREvKWpyimTZsWOgIAAACQO91yTqmZ1UkaLunv8aSTzewhM7vEzLbqjveoZa/cPSd0hIS85SmKs88+O3QEAAAAIHe6cviuJMnMBki6StKp7v6qmV0o6YeSPL7/maQTKrxusqTJkrTjjjt2NQZ6gbopC6rOb5o+NqMkAAAAALLSpZ5SM+unqCC93N3/T5LcfZW7r3f3dyXNkrR/pde6+0x3L7l7aeDAgV2JAQAAAACoUV0ZfdckXSzpn+7+32XTy4cY/aykhzsfr3f4wHEzQkdIyFueomhoaAgdAQAAAMidrhy+e6CkYyX9w8wWxdO+L2mimdUrOny3SdKJXXgPAAAAAEAv1umi1N3vkmQVZl3X+Ti907OzT9VO350fOkaLvOXpqFo/57RUKsndQ8cAAAAAcqVbRt8FAAAAAKAzKEoBAAAAAMFQlGbg/QdODB0hIW95imLq1KmhIwAAAAC5Q1GagS0/9qXQERLylqcopk2bFjoCAAAAkDtdGX0XHbTs11/W9iddFjpGi7zl6U2qDca07Ndf1rrXXsgwDQAAAJB/9JRmYP2aF0NHSMhbnqKg3QEAAIA0ilIAAAAAQDAcvpuB/oN2CR0hobN5av06oaHlbT8AAAAA8oCe0gwMPv4XoSMk5C1PUdDuAAAAQBo9pRl44fpfapsx3wgdo0WoPO31tOYhQ0/29r5w/S8lepMBAACABHpKM7DmwRtCR0jIW56ioN0BAACANHpKgY2Qh95eAAAAoDehpxQAAAAAEAxFaQaGfn126AgJectTFLQ7AAAAkEZRmoG3Vz0eOkJC3vIUBe0OAAAApFGUZmD1VT8MHSEhb3mKgnYHAAAA0hjoCMhQyEvSAAAAAHlETykAAAAAIBiK0gxsfdjJoSMk5C1PUdDuAAAAQBpFaQa2qB8TOkJC3vIUBe0OAAAApFGUZuCpc8eFjpCQtzxFQbsDAAAAaQx0hF6jvUGEslpHT74/AyEBAACgt6GnFAAAAAAQDD2lGdhsl/1CR0jIW56iyEO7d6QnmN5YAAAAZIme0gxs9/mpoSMk5C1PUdDuAAAAQBpFaQaeu/Ls0BES8panKGh3AAAAII3DdzOwdun9oSMk9FSe0IME5V3e9oO2MNgSAAAAskRPKQAAAAAgGHpKASBn6K0GAABFQk9pBnb67vzQERLylqcoaHcAAAAgjaI0A68tuj50hIS85SkK2h0AAABI4/DdDLx4w6+0Rf2Y0DFa5C1PURSl3Xv60NOuDqjFoa8AAAD5Qk8pAAAAACAYekqBGsJld7quI21IbyoAAEB26CnNwMCjzgwdISFveYqCdgcAAADSKEoz0H/Qh0JHSMhbnqKg3QEAAIA0Dt/NwPLfHJery4HkLU9R9JZ27+lDiDlEGQAAoFjoKQUAAAAABENPKYBM0RMKAACAcvSUZmDAvoeFjpCQtzxFQbsDAAAAafSUZmCbMd8IHSEhb3mKgnYvjvZ6g7t6yZmeXj8AAECW6CnNwMpLTwkdISFveYqCdgcAAADSKEoz8PaqpaEjJOQtT1HQ7gAAAEAah+8CQMF0x2BTvf0Q5LznAwCgN6GnNAN9BmwdOkJC3vIUBe0OAAAApNFTmoHtT7osdISEvOUpCtq9doQeqKg3KMJn7Gn01gIAioKe0gy8fNfloSMk5C1PUdDuAAAAQBpFaQZeuXtO6AgJectTFLQ7AAAAkMbhuwCwkTg0tefbgENX20cbAQB6C3pKAQAAAADB0FOagQ8cNyN0hIS85SkK2h1ZoSe353WkjdvrqSxCb3MeMvSkPFxeCQB6gx7rKTWzMWb2LzN73Mym9NT7AAAAAABqV4/0lJpZH0m/lnSopGWS7jezue6+uCfeL++enX2qdvru/NAxWuQtT1HQ7kD36WoPXBa9yXnvsa6FXsxayJh3oS9xxTbKhyJsp97+GbvjCJ0866me0v0lPe7uT7j725KukPSZHnovAAAAAECN6qmidKikZ8qeL4unAQAAAADQwty9+1dqdrSkw9z9P+Lnx0ra392/UbbMZEmT46e7SfpXN7z1tpKe74b1IBy2Ye/Adqx9bMPaxzasfWzD2sc27B3Yjt1jJ3cfWGlGT42+u0zSDmXPt5e0onwBd58paWZ3vqmZNbh7qTvXiWyxDXsHtmPtYxvWPrZh7WMb1j62Ye/Adux5PXX47v2SdjWznc2sv6QJkub20HsBAAAAAGpUj/SUuvs6MztZ0g2S+ki6xN0f6Yn3AgAAAADUrp46fFfufp2k63pq/W3o1sOBEQTbsHdgO9Y+tmHtYxvWPrZh7WMb9g5sxx7WIwMdAQAAAADQET11TikAAAAAAO3qNUWpmY0xs3+Z2eNmNiV0HlRmZpeY2XNm9nDZtK3N7CYzeyy+36ps3vfibfovMzssTGqUM7MdzOw2M/unmT1iZqfE09mONcLM3mNm95nZg/E2PDuezjasMWbWx8weMLP58XO2YY0xsyYz+4eZLTKzhnga27GGmNmWZnalmS2J/288gG1YO8xst/jvr/n2qpmdyjbMVq8oSs2sj6RfS/q0pD0lTTSzPcOmQhsulTSm1bQpkm5x910l3RI/V7wNJ0jaK37Nb+JtjbDWSfq2u+8haZSkk+JtxXasHW9J+qS77yupXtIYMxsltmEtOkXSP8uesw1r0yfcvb7skhNsx9ryC0nXu/vukvZV9DfJNqwR7v6v+O+vXtJISW9Iulpsw0z1iqJU0v6SHnf3J9z9bUlXSPpM4EyowN3/KunFVpM/I2l2/Hi2pCPLpl/h7m+5+5OSHle0rRGQu69094Xx49cU/ec7VGzHmuGRNfHTfvHNxTasKWa2vaSxkn5bNplt2DuwHWuEmb1P0kGSLpYkd3/b3V8W27BWHSJpqbs/JbZhpnpLUTpU0jNlz5fF01AbBrn7SikqeCRtF09nu+acmdVJGi7p72I71pT4sM9Fkp6TdJO7sw1rzwxJ/ynp3bJpbMPa45JuNLNGM5scT2M71o4PSlot6X/jQ+l/a2bvFduwVk2QNCd+zDbMUG8pSq3CNIYVrn1s1xwzswGSrpJ0qru/Wm3RCtPYjoG5+/r4UKXtJe1vZntXWZxtmDNmNk7Sc+7e2NGXVJjGNsyHA919hKJTkE4ys4OqLMt2zJ++kkZIutDdh0t6XfFhnm1gG+aUmfWXNF7Sn9tbtMI0tmEX9ZaidJmkHcqeby9pRaAs2HirzGywJMX3z8XT2a45ZWb9FBWkl7v7/8WT2Y41KD7M7HZF58WwDWvHgZLGm1mTolNWPmlmvxfbsOa4+4r4/jlF57HtL7ZjLVkmaVl8tIkkXamoSGUb1p5PS1ro7qvi52zDDPWWovR+Sbua2c7xrxwTJM0NnAkdN1fScfHj4yRdWzZ9gpltamY7S9pV0n0B8qGMmZmic2f+6e7/XTaL7VgjzGygmW0ZP95M0r9JWiK2Yc1w9++5+/buXqfo/7xb3f0YsQ1ripm918y2aH4s6VOSHhbbsWa4+7OSnjGz3eJJh0haLLZhLZqoDYfuSmzDTPUNHaA7uPs6MztZ0g2S+ki6xN0fCRwLFZjZHEmjJW1rZsskTZU0XdKfzOwrkp6WdLQkufsjZvYnRf+4r5N0kruvDxIc5Q6UdKykf8TnJErS98V2rCWDJc2ORwvcRNKf3H2+md0jtmGt4++wtgySdHX0W5/6SvqDu19vZveL7VhLviHp8rhj5AlJ/67431a2YW0ws80lHSrpxLLJ/HuaIXPnEGgAAAAAQBi95fBdAAAAAEANoigFAAAAAARDUQoAAAAACIaiFAAAAAAQDEUpAAAAACAYilIAAAAAQDAUpQAAAACAYChKAQAAAADB/H860d8Vc9RlcAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "items_per_user=df.groupby(['user']).count()['rating']\n", + "\n", + "plt.figure(figsize=(16,8))\n", + "plt.hist(items_per_user, bins=100)\n", + "\n", + "# Let's add median\n", + "t=items_per_user.median()\n", + "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", + "plt.text(t*1.1, plt.ylim()[1]*0.9, 'Median: {:.0f}'.format(t))\n", + "\n", + "# Let's add also some percentiles\n", + "t=items_per_user.quantile(0.25)\n", + "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", + "plt.text(t*1.1, plt.ylim()[1]*0.95, '25% quantile: {:.0f}'.format(t))\n", + "\n", + "t=items_per_user.quantile(0.75)\n", + "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", + "plt.text(t*1.05, plt.ylim()[1]*0.95, '75% quantile: {:.0f}'.format(t))\n", + "\n", + "plt.title('Number of ratings per user', fontsize=30)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6UAAAHvCAYAAACsfXllAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABOH0lEQVR4nO3de5wU5ZX/8e8RJF7whiJyUTGIRlEcYTBkTQwxm0AU0ISY4EbFNcuwURLNVYw/BaLu6ibu4n0ZEhWzrsbVuHJJwEtkvUTFGQTjXYgYLiMgBgVFCXh+f1QNNENPTw9T81TVzOf9evWru6uq6zx9+mHo00/VU+buAgAAAAAgDbuk3QAAAAAAQPtFUQoAAAAASA1FKQAAAAAgNRSlAAAAAIDUUJQCAAAAAFJDUQoAAAAASA1FKQAEYGaTzMzj25C025M3ZtbFzK4ys+fMbL2ZfRzncl3abWstZnZ7QZ/pnXZ7kB4zG1LQFyal3R4ASFrHtBsAoG0xs4YXPx7s7s808ZrRku6Kn05290mt0Tbkk5l1k/S0pN4pN6VF4h8jhsRPb3f3pWm1BW2PmV0kaV9J69x9SqqNAYBmoigF0Nr+RdIX024Ecu1SbStIn5T0X5JWSXJJf0upTTtjiKSJ8eN5kpam1RC0SRdJOlTSm5KmpNoSAGgmilIAre1kM/t7d3847YYgt06J7/8q6cvu/kGajQnF3c+VdG7KzUAGuPs8SZZ2OwCgtXBOKYDWUlg4/GtqrUBbcHB8/2p7KUgBAGhPKEoBtJZlku6PH1ea2dfSbAxyrVN8/1GqrQAAAK2CohRAa/p/kj6OH19pZh12dkcFM0/Oa+m2Zjavfpv4+S5mdl68fLWZvW9mfzKzS81srwavPcjMrjCz583sPTN718weM7Nv7sR7OsXMHjCz5Wb2UXx/l5l9phn7OCBu5+Nm9paZbTKzNfHznzRsf5HXL41zsTR+vpuZfc/MnjCzVfEst/Oa+94K9r+HmX3fzB6N2/dRnOMnzOwSM9unkddtna24YPHnCz7bnZrJ2MzOLXjtufGySjP7pZktjj/77fZrkc/Fs//+wcxWxu/jfTN7w8zuNrMRZlb08Mr696Jt55NK0qNF3su8Bq8rOftusRlZzewQM7vWzF6J27fOzP5oZuebWVmn7JjZV81sdvz5fxj3kf8ys083lsNG9nOamd1jZn82sw/ifa0ws0Vm9msz+5aZ7VdOmxrZ/3Z5M7OuZvYzi/7tvhv/+6w1swlmtnsz9jsszv3rFs30/IGZLYmXfbaJ1za7f5XZpkZn340/H1d0PqkkHVqkbzX6WZlZJzP7tpnNMLNl8ee0zqK/cdcW63sNXr9DPzWzUWb2+/jfysa4P/7czLo2eO0+Zvbj+HP6q5ltMLNnzeyfzYzvqEB74u7cuHHjlthN0eQzLumV+PntBcvObeQ1owu2mdTEfuc1ow1Ft1U0yUz9Np0lPVzwvOFtgaT94td9RtLqEtv+okSbJhVsN0TSTSX2s0XSxDLe57mS3iuxH5f0lqTPlNjH0ni7pZIOk/RCkX00mfNG9j1Y0oom2ve2ovNES+Wr1G1IM9t0bmF/lDRB0uZS+5V0W5lt+b2kvVvwXuY1eN3tBet6F9nvkIL1kyQNU3TebWP7f1DSJ0rkZldJ95R4/WZJP2yYwyL72V3SrDLf80UJ/K2ZJ+l4SctLxHld0mFN7K+rpEfKaPMvJe2aVP8q871u91k38m+4qVuxz6pS0p+beN1HksaVaFthP+2jaCKyxva1VNKh8euOlLS4xLb3SLKd7R/cuHHL142JjgC0tomSzlR0COYkM/tvd9+UcpsK3aZoduAnFX0JekvRiMMF8f3xkqaY2URJcxW9j19KekLSJkmfkzRW0cRxPzSzue7+UBMxL5R0uqKC7JeSnpe0h6KiYpSio1gmmdlad7+x2A7M7EJtm2HzI0n3SXpc0lpJXeJ9nSapm6SHzWyQu79Uok2fkPRbSf3i93afpJWKvqh3a+L9FGtfhaQ/KCpQJOk5Sf8t6S+SDpL0DUknStpf0iwz+7JHk7nUu1vSwvjx/fH9i4pG3wu90Ny2FfiGpK9IelfSdEm1in4QOC5eVm93RTn+P0nzJS2R9L6i3Bwh6Wxty/kdij7bQvXvZbSk+hH1y4q0/e0WvJcKST9WNBnOVElPxW2ulPTPkvaU9CVFMxlf3sg+qiWdET/+UFGx8ZSinFRK+rakX0i6t4m2/IukU+PHdYqKlBclbVD0I9Dhin7gOansd1faPor6bk9Fhff/SnpHUdHzbUmHxDEfMbMKd3+v4Q7MrIui99onXvSSpP+R9Jqioz36KSoye8X77KimJ6Eqt3+1VJWivx/VivrkmnhZQwsKn1h0RMbD8WulqCD/vaJTL3ZT9BmdE6//TzP7yN1vb6ItV0v6uqK/af+laCbggxT9jTxG0d/UO8zsNEkPKfrMfhO3Y72kAYr+9u6pqC8+qOhvJIC2Lu2qmBs3bm3rpm2/cr9SsOz6guXfK/KaNEdKXdJPi2zTVdtG+TYrKipWS+pfZNuzC/b1u0ZiTmoQ8wVJBxbZ7nRFlzlxRYXPIUW2GViwzSuS+jYS81RFhbNLeqaRbZY2aNf3E+gDu2j7EdcpknYpst1lBdv8RdJuLf3sy2jbuQ3e78uSejTxms9J2rfE+j21/Qjj58voA0PKaOvtBdv3LrJ+SIP38maxviDphIL+8o6KjJYq+mGmfj9rJB1TZJveRfrLuQ226SBpnbaNiu3Qxxv8G/tUCz7LhiNrFxTZprOkRwu2uaGRfd1fsM3/a6S/dlb0w1T9dsOS6F9lvtfCz3pSI9vUfzZLy9jfXvG/OVf0Y8FXGtnu8Lhf1W93QBP91CX9Z8P8KfphZ1HBNjWKJsM7ucj+TlL0Q4BLeqmluePGjVs+bhyvDyCEKxUVWJJ0qZl1TrMxDcx1939puNDd10iqH6XsoGh0Y7y7P19k218rOjxQkr5Yxrl7myV9091XF9nX/0q6Nn66h6TvFHn9REUjNR9JGu7urxfZRu4+W9HIhSSdYGZ/10S77nf3/2him3IMVzSyJElPKyp0P264kbtfIWl2/PRgSWclELs5XNJod19ZciP3x919XYn17ysaPavv42cn1sLmOatYX3D3+YpGoyRpP0VFakPfL3g83t13GIF296VqenSwq6KRS0l6oFgfL9jfGnd/pYn9letud7+pSIwNin70qh8d/baZ7Vu4jZkN0LbR7Vvd/cpG+mv9vupHOX/QRJvK6l8pGatts1p/x91/X2wjd18s6R/jp3uq+AhsoRcU9Z/t8ufuG7Xtb5EU/bA2yd3/UCTmY4pGTiXpKDM7uOE2ANoeilIArS7+Yjolfnqgoou8Z0XRw2NjTxY8XqXShy0+Ed930rZDABsz191fLLF+iqLD/CTpq4Ur4olh6g+NfCD+0ljKfxU8/nIT297QxPpyFc60/HN39xLbFn5RDT1D8+PuviiJHbn7ekl/ip9+Ool9NtNz7v54ifWFX/6PLlxhZrtpW9+oU3TYalEeHWK9ww8zBTY2FqeVXdvYCndfpW3/DnZXdJh1ocIfEX5RKoi7/1XS7+KnJ5nZJ0psnlj/agX177lO0p2lNowLx/rCuqm/IVPdfXMj6wr/nm5RdJh5Y54oeByyHwFICeeUAgjl54pG/bpI+pGZ3ezu76TcJkl6psS6VQWPa4uNnjSybVMzij5SaqW7v2VmLys6B+sIM9vH3etHZ07Uth8UPzSz05uItWvB46NKbLdF0Tl1SagfiXNF542V8kdtO9cwdDFXqojbTlx8fEPRebrHKTrPtrOiczgb6pVI65rn6SbWryh43LB/Hqdt/eT/mujnUnT4e/9iK9z9XTObr6gP/L2Z3a/ox47H3f1vTex3Z72r6HzNUv4g6fz48SBF5/nW+1x8v0nSkWZ2ZBP7+kTB/ScVHaJbTNn9KySLZryu//zqJI204hNHF9oQ35f6GyKV//f01YK/aU1tu9MzNAPID4pSAEHEX1avkXSNosP7Jkj6SbqtkhRNDNSYwutiltqu4ba7NbFtU6Ob9dsco6joOUjbDhnsXbDNOfGtXKW+3K119w+bsa9Susf3b8UjiI1y94/NbImiwqiLmXXycBNhrWh6E8nMjlU08VPfMve79063aOc1NUlSqf7Zo+Dxn8uI1dQ2Fyj64WVvRYfFni7pfTN7RtEI2MOSniyj+C3XkiZG46Xt/831aLCud3zfSdsm1SpXqX9TZfWvFBysbT9sDVDz3nNTBWKjfyfd/aOC4jfJv6cA2gAO3wUQ0g3adhjYeDNr+OUwuGZ8MU7qC7QUTfDRlPcLHheeg7tPww2boVOJdRtLrGuu+mujvl9yq202FDwueV3VhDX5nuNZWR/WtoJ0maRbFM2g/A+KDjn+anyrPyQ7jf9bW9I/9yx43Ny+uQN3r1E0G/Ad2pbjPSWdrGjm38ckLTGzpM4hbsm/Jykf/6aS1JL3u2sT69P4ewqgDWCkFEAw7r7RzK5Q9KV+d0VfUP85yRg5ueD6Hk1vsl2hsKGRx+e6+/RkmpSo9ZL21fbvoZTCIqHkyGoKxis6D1qKLuvxT42dM2dmlwZrVbIKC7bm9s2i3P0NSWPMbJyiS4v8naTPSvq8on/7vSX92swOKTbRWDO15N9T/fN9Fc1ae1gL25IHhe//dnf/x0a3BIBA8vDlDUDb8itF13mUopkwDy/zdfWHdJYamZCkA3aqVWGV857rt3FF106tV3hIYD9lU118f5CZlRz5tOh4vvqJodYGPHS3XH8f32+WdFGJSVyk6BqMeVQ4O+wny9i+nG0kSe7+obs/6u5XuftXFBX4Fyvq15J0uZntX35Ti+pjTZ8UWfhvruFsuPX/pg42szQOvQ4tD39DALQzFKUAgoonO7k8ftpR0s/KfOm6+L6pQ37TmPm0uU4utdLMDtK2CUVeazAhyGPa9oX+tIyODM+P703R9S9L+TttGymdX2rDlHSL79eWuiyMmR2v6HIopRQestjkzDIBLVJ0HVMpmlG2qT41ZGcDufsGd/83RefoStFkQYN2dn+xfRSdG1nKFwoeP9tg3f/F9x0kjWhhW9JU379K9i13f1vSS/HTgVxyBUAWZPHLDIC27y5tu6zEaEWT3DSl/kvUoWZWaqTmey1pWCDDzKzULJbfU/QFWZJ+W7givrzOnPjpEYquj5k19xU8/lETo1gXN/K6rKg/X/HAJkZ9Ly+xrl7hYZPlHtrc6uIJrh6Mn/aQdEZj25rZEDUy824zLS14nMSpRI1eM9TMumrbNXA3atu/n3p3FDy+3Mwy89k0U33/Kqf99Yf97yLpX1unOQBQPopSAMHFM2XWn39nkr5bxssKv0heU6zQMbOfadvhllnWUdJv4i/L2zGzEZJ+FD/9QNH5tw39P20b2bqhqQljzOwQM/u5mR1YarsEzda2SX9OlPTzYqNvZvZTbRuZWqYmrpeYkvpRNZN0ZcOVFvmZohlmm/JGweOmRvZCm1Lw+EYzO6bhBmbWW9LtpXZiZseb2WVm1q3ENgdoW+HrKn3d03L9g5ntcH56XGDepW2T+/yq4Yi3uz+jbT+IHCFpZhPt72hmp5vZ+Y1tk5L6/rW/mR3SxLY3SXozfvwtM/sPM2v01Agz29vMvmdmefj7CiCHmOgIQCrcfZaZ/VHR4Zvl/LJ/q6JLyHSR9HVJj5vZnYouhXGIohHXSkXXHxzdKo1Ozv8qKmJeNLNpkv6kaLKWoYq+rNcX3Be7+7KGL3b3BWb2HUnTFB3++Gsz+6GkBxRd+uIjRRO3fEpRUXhCvM/rWu0dbd++j83sbElPKprU5oeSvhB/XssVHRL7DUUT30hRgX1OgpekSdLNks5TNHL9PTOrUDR6/ZaiS2v8g6TjFY3kb5Q0sMS+Hlf0XneV9GMzqy/I6i9/8Y67p3IIs7s/bGa3SzpX0XnZz8bP/6josNBKRXnYW9K9iv4NSjvOorqPokPyJ5rZk/HrX1M0gVUXSccqylmXePs73f0vLWz+QkX9/Zb4ur33Kzrcv/5Igvpzfd/Qth/DGjov3v5YRYf6/tnM7lV07d63FV2WpLuiHxO+HLf/Vy1sd9IekTQyfvxbM7tF0fnd9Z/Rn9x9hSS5+/txrv5P0Wd6kaRvmNk9ivrke4pmwj5M0d+PLyj6W3N2kHcCoN2hKAWQpku07Xyuktx9TTwi+FtFXxBPjG+FZin6Epr1ovQ6RZONXCDpp0XWu6SfufuNje3A3X9lZqsVFabdFF2Co6JEzLWSghV97v6cmX1R0QhU/Zf5YqOD70j6B3efF6ptzeHuC83su5JuVHR00UnxrdDLkk6T9Msm9vW2mf1CUb/vrB3Pp/4/teB8zQRUKWrX1xX9G/tnbT879seKRvHf1baitOFsyfUFUAcVz1Whe+KYLfWupH9U9O9/aHxraImkL7v7e8V24O7vmdmJiv49fVPRj0RNXQe44YRJabtV0d+UIxT9ONKwP/6jCka64759gqKR5OMVHbp9UYn9f6Smr4cLADuFw3cBpMbdH9OO53eV2v73igqv2yT9RdGMvGskParoF/yR7l7ONQtT5+7jJZ0qaaaiL7eb4vvfSDrR3SeVsY+ZikYy/lnSDEWHwG7Utrw8pejasCMk9YgnOAnG3Z9SdH3PHygquNYoGilcG7ftUkl93H1uyHY1l7vfougHkP9RNEL6N0mrFY0C/kBSpbsvLnNfP5V0pqJ+/5a2zSqdOnf/m7ufIWmUovatUVSI/EXRodUnuvu1kgpny32nwT4ek3SkpO8oKjpfUXSu48fx/UuKiqfPu/s33T2Ra3m6+0JFhdWVkl5QVCxvkPScoh9++rv7n5vYx3p3H63ox5Mp8WvXKpp5eYOk1xUd5fADRf22nPOIg3H3DZIGS7pK0gJFxXrJ64G6+6uKCtjTFJ1n+pqiUdItikabFyk65/ZcSd3dvey/1wDQHBad2gUAANA0M7tP0tfip/u7+zultm/FdtR/gfk/dx+SRhsAAMlgpBQAAJQlnuxoePx0UVoFKQCgbaEoBQAAMrM+ZtarxPqeiiYRqp+ldWqQhgEA2jwmOgIAAJL0GUm3mdljimYKXqLoHOX9FZ2r+A1FEwBJ0tOSqtNoJACg7aEoBQAA9TpKOjm+NWaepFHuviVIiwAAbR5FKQAAkKIZnL8laZiimWwPUHQ9zk2SVkl6RtLd8azPAAAkJhOz7x5wwAHeu3fvtJux09asWaOuXbu2+Zh5RJ4AAACA9NXW1r7t7kW/mGdipLR3796qqalJuxkAAAAAgFZgZm82to7ZdxMwadKkdhEzj8gTAAAAkG2ZOHy3srLS8zxSamYKncc0YuYReQIAAADSZ2a17l5ZbB0jpQAAAACA1FCUAgAAAABSQ1GagDQOPc7z4c4hkScAAAAg2yhKAQAAAACpYaKjBDDRUXaRJwAAACB9THQEAAAAAMgkilIAAAAAQGooShMwceLEdhEzj8gTAAAAkG2cUwoAAAAAaFWcU9rKevTo0S5i5hF5AgAAALKNojQBdXV1WrZsmb7whS/oqKOOUr9+/XTddddtXT9p0iT17NlTFRUVqqio0O9+9ztJ0pNPPqn+/ftr0KBBWrx4sSRp3bp1Gjp0aJMzxtbV1bXa+/nf//1fvfTSS1ufX3755Xr44YclSUOGDGnRtT/nzZuniooK9evXT5///Odb3NamNJanV199devnUVFRob333ltTpkyR1DqfV2tqrc/r3Xff1YgRI3TcccepX79+uu2227aumzNnjo488kgdfvjhuvrqq1v2BgAAANCudUy7AW1Fx44dde2112rAgAFav369Bg4cqC996Us6+uijJUnf//739aMf/Wi711x77bW67777tHTpUt1yyy269tprdcUVV+inP/2pzCyNtyEpKnKGDx++te0/+9nPEtnvunXrdP7552vOnDk65JBDtHr16kT2uzOOPPJILVy4UJK0ZcsW9ezZU1/96le3rufzkm666SYdffTRmjlzptasWaMjjzxS3/rWt9ShQwddcMEFeuihh9SrVy8NGjRII0eO3BofAAAAaA5GShMwYMAAde/eXQMGDJAk7bXXXjrqqKO0YsWKkq/bddddtXHjRn3wwQfaddddtWTJEq1YsaLkCOKcOXP0qU99Snvuuae+973vafjw4ZKi0b1f/OIXW7c75phjtHTpUknS6aefroEDB6pfv36qrq7euk3nzp116aWX6rjjjtPgwYO1atUq/fGPf9SMGTP04x//WBUVFVqyZInOPfdc3XvvvTu05cEHH9RnPvMZDRgwQGeccYY2bNhQ8v3+93//t772ta/pkEMOkSQdeOCBJbdPQv1nUsojjzyiPn366NBDDy25XUs+r89+9rO5+7zMTOvXr5e7a8OGDerSpYs6duyo+fPn6/DDD9cnP/lJderUSaNHj9YDDzxQcl8AAABAYyhKE1BbW7vd86VLl+q5557Tpz/96a3LbrzxRvXv31/nnXee/vrXv0qSLrnkElVVVWnKlCkaP368Lr30Ul1xxRWNxvnwww81duxYzZw5U+vXr9dbb71VVvtuvfVW1dbWqqamRtdff73Wrl0rSXr//fc1ePBgLVq0SCeddJKmTZumv/u7v9PIkSP185//XAsXLlSfPn2K7vPtt9/WlVdeqYcfflgLFixQZWWl/v3f/11SdPjojBkzdnjNa6+9pr/+9a8aMmSIBg4cqDvuuKOs9rdEw8+mmLvvvltnnnnmdsuS/rwef/zx3H1e48eP18svv6wePXro2GOP1XXXXadddtlFK1as0MEHH7x1u169ejX5AwwAAADQGIrSBFRVVW19vGHDBo0aNUpTpkzR3nvvLUn6zne+oyVLlmjhwoXq3r27fvjDH0qSKioq9PTTT+vRRx/Vn//8Z/Xo0UPurm9+85s666yztGrVqu3ivPLKKzrssMPUt29fjRs3TmeddVZZ7bv++uu3jq4tW7ZMr7/+uiSpU6dOW0fuBg4cuHWkrhxPP/20XnrpJZ144omqqKjQ9OnT9eabb0qKDh8dOXLkDq/ZvHmzamtrNXv2bM2dO1dXXHGFXnvttbJj7ozCz6aYTZs2acaMGTrjjDO2LmuNz8vMcvd5zZ07VxUVFVq5cqUWLlyo8ePH67333it6/myahy8DAAAg3yhKEzBt2jRJ0t/+9jeNGjVK3/rWt/S1r31t6/pu3bqpQ4cO2mWXXTR27FjNnz9/u9e7u6688kpddtllmjx5siZPnqyzzjpL119//Q6x6r/818es17FjR3388cdbn3/44YeSoomFHn74YT311FNatGiRjj/++K3rdt11163769ChgzZv3lz2e3Z3felLX9LChQu1cOFCvfTSS/rVr35V8jW9evXSsGHDtOeee+qAAw7QSSedpEWLFpUdc2c0zFNDv//97zVgwAB169Zt67LW+LwaysPnddttt+lrX/uazEyHH364DjvsML3yyivq1auXli1btnW75cuXM8sxAAAAdhpFaULcXd/+9rd11FFH6Qc/+MF26wpngL3//vt1zDHHbLd++vTpOvXUU7Xffvvpgw8+0C677KJddtlFH3zwwXbbfepTn9Ibb7yhJUuWSJLuuuuuret69+6tBQsWSJIWLFigN954Q1I0g+p+++2nPfbYQ6+88oqefvrpJt/LXnvtpfXr15fcZvDgwXryySe3zkL7wQcfNDnqedppp+nxxx/X5s2b9cEHH+iZZ57RUUcd1WR7WtNdd921w6G7fF6RQw45RI888ogkadWqVXr11Vf1yU9+UoMGDdLrr7+uN954Q5s2bdLdd99ddKQVAAAAKAez75ap94TZJdc/+eST+vWvf61jjz1WFRUVkqR/+Zd/0SmnnKKf/OQnWrhwocxMvXv31tSpU7e+7oMPPtD06dP14IMPSpJ+8IMfaNSoUerUqdN2RYwk7bbbbqqurtapp54qSTr00EP1wgsvSJJGjRqlO+64QxUVFRo0aJCOOOIISdKwYcP0n//5n+rfv7+OPPJIDR48uMn3Onr0aI0dO1bXX3990QlzJKlr1666/fbbdeaZZ+qjjz6SJF155ZU64ogjdPnll6uysnKHQuWoo47SsGHD1L9/f+2yyy76p3/6px0KvpA++OADPfTQQ9t9HpJa5fM64IAD9NnPfjZXn9dll12mc889V8cee6zcXddcc40OOOAASdE5t0OHDtWWLVt03nnnqV+/fk22EwAAACjG0ry+Yr3KykpvybUvQyhVlG5ev1bLbzonYGuklStX6rXXXtMvfvELzZo1K2jsPFm5cmVmDi2dN28enxcAAADaJTOrdffKYus4fDcBm1YtDh6znFllQZ4AAACArGOktEylRkrfvGZ40RlJW5OZBY+ZR+QJAAAASB8jpQAAAACATKIoBQAAAACkhqI0AV2Gjg8es+GMsSiOPAEAAADZRlGagL0qhgWPWVVVFTxmHpEnAAAAINsoShPw5jXDg8c0s+Ax84g8AQAAANlGUQoAAAAASA1FKQAAAAAgNRSlCdi9z6DgMYcPD3/IcB6RJwAAACDbKEoTcODXJwaPOXPmzOAx84g8AQAAANlGUZqA1fdODh5zxIgRwWPmEXkCAAAAso2iNAEblzwbPOasWbOCx8wj8gQAAABkG0VpG2JmOvvss7c+37x5s7p27drs8yqHDBmimpoaSdIpp5yidevWJdlMLVy4UJ/5zGfUr18/9e/fX7/5zW+2rvvc5z6niooKVVRUqEePHjr99NMTjQ0AAAAgWzqm3QAkZ88999QLL7ygjRs3avfdd9dDDz2knj17tmifv/vd7xJq3TZ77LGH7rjjDvXt21crV67UwIEDNXToUO277756/PHHt243atQonXbaaYnHBwAAAJAdjJQm4NCLwx8i6u5Fl3/lK1/R7NmzJUl33XWXzjzzzK3r3n//fZ133nkaNGiQjj/+eD3wwAOSpI0bN2r06NHq37+/vvnNb2rjxo1bX9O7d2+9/fbbkqTTTz9dAwcOVL9+/VRdXb11m86dO+vSSy/Vcccdp8GDB2vVqlUl237EEUeob9++kqQePXrowAMP1Jo1a7bbZv369frDH/7Q4pHSxvIEAAAAIBvKLkrNrIOZPWdms+LnXczsITN7Pb7fr2DbS8xssZm9amZDW6PhWbJ+4ZzgMQuLwkKjR4/W3XffrQ8//FDPP/+8Pv3pT29dd9VVV+nkk0/Ws88+q0cffVQ//vGP9f777+uWW27RHnvsoeeff16XXnqpamtri+771ltvVW1trWpqanT99ddr7dq1kqJid/DgwVq0aJFOOukkTZs2TZI0Y8YMXX755SXfx/z587Vp0yb16dNnu+X333+/vvjFL2rvvfcuOyfFNJYnAAAAANnQnJHSCyW9XPB8gqRH3L2vpEfi5zKzoyWNltRP0jBJN5tZh2Sam03vzL0xeMxx48YVXd6/f38tXbpUd911l0455ZTt1j344IO6+uqrVVFRoSFDhujDDz/UX/7yFz322GM666yztr6+f//+Rfd9/fXXbx0NXbZsmV5//XVJUqdOnbaetzpw4EAtXbpUkjRy5Ej97Gc/a/Q91NXV6eyzz9Ztt92mXXbZvis2HOXdWY3lCQAAAEA2lHVOqZn1knSqpKsk/SBefJqkIfHj6ZLmSbo4Xn63u38k6Q0zWyzpBElPJdZqlDRy5Ej96Ec/0rx587aOZkrRoaz33XefjjzyyB1eY2Yl9zlv3jw9/PDDeuqpp7THHntsLWoladddd936+g4dOmjz5s1NtvG9997TqaeeqiuvvFKDBw/ebt3atWs1f/583X///U3uBwAAAEC+lTtSOkXSTyR9XLCsm7vXSVJ8f2C8vKekZQXbLY+XIZDzzjtPl19+uY499tjtlg8dOlQ33HDD1vMsn3vuOUnSSSedpDvvvFOS9MILL+j555/fYZ/vvvuu9ttvP+2xxx565ZVX9PTTT+90+zZt2qSvfvWrOuecc3TGGWfssP5//ud/NHz4cO222247HQMAAABAPjRZlJrZcEmr3b34iYZFXlJk2Q6zzZhZlZnVmFlNw0lu8qbrqMuCx5wxY0aj63r16qULL7xwh+WXXXaZ/va3v6l///465phjdNllUbu/853vaMOGDerfv7/+7d/+TSeccMIOrx02bJg2b96s/v3767LLLtthdLOxNhY7p/See+7RY489pttvv33r5V8WLly4df3dd9+dyKG79W0AAAAAkF3W1OykZvavks6WtFnSbpL2lvRbSYMkDXH3OjPrLmmeux9pZpdIkrv/a/z6uZImuXujh+9WVlZ6/XUxs6r3hNmNrtu8fq2W33ROwNZIK1euVI8ePYLGzCPyBAAAAKTPzGrdvbLYuiZHSt39Enfv5e69FU1g9Ad3P0vSDElj4s3GSHogfjxD0mgz+4SZHSapr6T5LXwPmbbi5jFNb5Swll5/tL0gTwAAAEC2lTXRUSOulnSPmX1b0l8knSFJ7v6imd0j6SVFo6sXuPuWFrcUAAAAANDmNKsodfd5imbZlbuvlfTFRra7StFMvQAAAAAANKo51ylFIzofNzR4zLFjxwaPmUfkCQAAAMg2itIE7D/su8FjVldXB4+ZR+QJAAAAyDaK0gTU3b7j5Vda28CBA4PHzCPyBAAAAGQbRWkCNq1aEjzmggULgsfMI/IEAAAAZBtFKQAAAAAgNRSlCejQuUvwmN27dw8eM4/IEwAAAJBtFKUJ6HXBHcFjrly5MnjMPCJPAAAAQLZRlCZg3RN3Bo85adKk4DHziDwBAAAA2WbunnYbVFlZ6TU1NWk3o6TeE2Y3uu7Na4YrdB7NLHjMPCJPAAAAQPrMrNbdK4utY6QUAAAAAJAailIAAAAAQGooShNw0JgpwWNm/XDnrCBPAAAAQLZRlAIAAAAAUkNRmoC3pl8UPGZlZdFzhNEAeQIAAACyjaIUAAAAAJAailIAAAAAQGooShOwz4lnBo85ceLE4DHziDwBAAAA2WbunnYbVFlZ6VmfJbX3hNkl1y+9+tRALQEAAACAfDGzWncvOuELI6UJWH7TOcFj9ujRI3jMPCJPAAAAQLZRlCZgy4Z3gsesq6sLHjOPyBMAAACQbRSlAAAAAIDUUJQmoFO3PsFjDhgwIHjMPCJPAAAAQLZRlCag+7nXBY9ZW1sbPGYekScAAAAg2yhKE7B2zg3BY1ZVVQWPmUfkCQAAAMg2itIEbFg0N3jMadOmBY+ZR+QJAAAAyDaKUgAAAABAaihKAQAAAACpoShNQM/zpwePuWLFiuAx84g8AQAAANlGUZqATasWB4/JrLLlIU8AAABAtlGUJmDNfVcEjzly5MjgMfOIPAEAAADZRlEKAAAAAEgNRSkAAAAAIDUUpQnoMnR88JhTp04NHjOPyBMAAACQbRSlCdirYljwmFVVVcFj5hF5AgAAALKNojQBb14zPHhMMwseM4/IEwAAAJBtFKUAAAAAgNRQlAIAAAAAUkNRmoDd+wwKHnP48PCHDOcReQIAAACyjaI0AQd+fWLwmDNnzgweM4/IEwAAAJBtFKUJWH3v5OAxR4wYETxmHpEnAAAAINuaLErNbDczm29mi8zsRTObHC+fZGYrzGxhfDul4DWXmNliM3vVzIa25hvIgo1Lng0ec9asWcFj5hF5AgAAALKtYxnbfCTpZHffYGa7SnrCzH4fr/sPd/9F4cZmdrSk0ZL6Seoh6WEzO8LdtyTZcAAAAABA/jU5UuqRDfHTXeObl3jJaZLudveP3P0NSYslndDilgIAAAAA2pyyzik1sw5mtlDSakkPufsz8arxZva8md1qZvvFy3pKWlbw8uXxsjbr0IvDHyLqXup3AdQjTwAAAEC2lVWUuvsWd6+Q1EvSCWZ2jKRbJPWRVCGpTtK18eZWbBcNF5hZlZnVmFnNmjVrdqLp2bF+4ZzgMaurq4PHzCPyBAAAAGRbs2bfdfd1kuZJGubuq+Ji9WNJ07TtEN3lkg4ueFkvSSuL7Kva3SvdvbJr16470/bMeGfujcFjjhs3LnjMPCJPAAAAQLaVM/tuVzPbN368u6S/l/SKmXUv2Oyrkl6IH8+QNNrMPmFmh0nqK2l+oq0GAAAAALQJ5cy+213SdDProKiIvcfdZ5nZr82sQtGhuUsljZMkd3/RzO6R9JKkzZIuYOZdAAAAAEAxTRal7v68pOOLLD+7xGuuknRVy5qWH11HXRY85owZM4LHzCPyBAAAAGRbs84pRXGduh0ePObAgQODx8wj8gQAAABkG0VpAlbcPCZ4zJ492/RVdhJDngAAAIBsoygFAAAAAKSGohQAAAAAkBqK0gR0Pm5o8Jhjx44NHjOPyBMAAACQbRSlCdh/2HeDx6yurg4eM4/IEwAAAJBtFKUJqLv9wuAxmVW2POQJAAAAyDaK0gRsWrUkeMwFCxYEj5lH5AkAAADINopSAAAAAEBqKEoT0KFzl+Axu3fvHjxmHpEnAAAAINsoShPQ64I7gsdcuXJl8Jh5RJ4AAACAbKMoTcC6J+4MHnPSpEnBY+YReQIAAACyzdw97TaosrLSa2pq0m5GSb0nzG503ZvXDFfoPJpZ8Jh5RJ4AAACA9JlZrbtXFlvHSCkAAAAAIDUUpQAAAACA1FCUJuCgMVOCx8z64c5ZQZ4AAACAbKMoBQAAAACkhqI0AW9Nvyh4zMrKoucIowHyBAAAAGQbRSkAAAAAIDUUpQAAAACA1FCUJmCfE88MHnPixInBY+YReQIAAACyzdw97TaosrLSsz5Lau8Js0uuX3r1qYFaAgAAAAD5Yma17l50whdGShOw/KZzgsfs0aNH8Jh5RJ4AAACAbKMoTcCWDe8Ej1lXVxc8Zh6RJwAAACDbKEoBAAAAAKmhKE1Ap259gsccMGBA8Jh5RJ4AAACAbKMoTUD3c68LHrO2tjZ4zDwiTwAAAEC2UZQmYO2cG4LHrKqqCh4zj8gTAAAAkG0UpQnYsGhu8JjTpk0LHjOPyBMAAACQbRSlAAAAAIDUUJQCAAAAAFJDUZqAnudPDx5zxYoVwWPmEXkCAAAAso2iNAGbVi0OHpNZZctDngAAAIBsoyhNwJr7rggec+TIkcFj5hF5AgAAALKNohQAAAAAkBqKUgAAAABAaihKE9Bl6PjgMadOnRo8Zh6RJwAAACDbKEoTsFfFsOAxq6qqgsfMI/IEAAAAZBtFaQLevGZ48JhmFjxmHpEnAAAAINsoSgEAAAAAqWmyKDWz3cxsvpktMrMXzWxyvLyLmT1kZq/H9/sVvOYSM1tsZq+a2dDWfAMAAAAAgPwqZ6T0I0knu/txkiokDTOzwZImSHrE3ftKeiR+LjM7WtJoSf0kDZN0s5l1aIW2Z8bufQYFjzl8ePhDhvOIPAEAAADZ1mRR6pEN8dNd45tLOk3S9Hj5dEmnx49Pk3S3u3/k7m9IWizphCQbnTUHfn1i8JgzZ84MHjOPyBMAAACQbWWdU2pmHcxsoaTVkh5y92ckdXP3OkmK7w+MN+8paVnBy5fHy9qs1fdODh5zxIgRwWPmEXkCAAAAsq2sotTdt7h7haRekk4ws2NKbF5sulPfYSOzKjOrMbOaNWvWlNXYrNq45NngMWfNmhU8Zh6RJwAAACDbmjX7rruvkzRP0bmiq8ysuyTF96vjzZZLOrjgZb0krSyyr2p3r3T3yq5duza/5QAAAACA3Ctn9t2uZrZv/Hh3SX8v6RVJMySNiTcbI+mB+PEMSaPN7BNmdpikvpLmJ9xuAAAAAEAb0LGMbbpLmh7PoLuLpHvcfZaZPSXpHjP7tqS/SDpDktz9RTO7R9JLkjZLusDdt7RO87Ph0IvDHyLqvsMR0SiCPAEAAADZVs7su8+7+/Hu3t/dj3H3n8XL17r7F929b3z/TsFrrnL3Pu5+pLv/vjXfQBasXzgneMzq6urgMfOIPAEAAADZ1qxzSlHcO3NvDB5z3LhxwWPmEXkCAAAAso2iFAAAAACQGopSAAAAAEBqKEoT0HXUZcFjzpgxI3jMPCJPAAAAQLZRlCagU7fDg8ccOHBg8Jh5RJ4AAACAbKMoTcCKm8c0vVHCevbsGTxmHpEnAAAAINsoSgEAAAAAqaEoBQAAAACkhqI0AZ2PGxo85tixY4PHzCPyBAAAAGQbRWkC9h/23eAxq6urg8fMI/IEAAAAZBtFaQLqbr8weExmlS0PeQIAAACyjaI0AZtWLQkec8GCBcFj5hF5AgAAALKNohQAAAAAkBqK0gR06NwleMzu3bsHj5lH5AkAAADINorSBPS64I7gMVeuXBk8Zh6RJwAAACDbKEoTsO6JO4PHnDRpUvCYeUSeAAAAgGwzd0+7DaqsrPSampq0m1FS7wmzG1335jXDFTqPZhY8Zh6RJwAAACB9Zlbr7pXF1jFSCgAAAABIDUUpAAAAACA1FKUJOGjMlOAxs364c1aQJwAAACDbKEoBAAAAAKmhKE3AW9MvCh6zsrLoOcJogDwBAAAA2UZRCgAAAABIDUUpAAAAACA1FKUJ2OfEM4PHnDhxYvCYeUSeAAAAgGwzd0+7DaqsrPSsz5Lae8LskuuXXn1qoJYAAAAAQL6YWa27F53whZHSBCy/6ZzgMXv06BE8Zh6RJwAAACDbKEoTsGXDO8Fj1tXVBY+ZR+QJAAAAyDaKUgAAAABAaihKE9CpW5/gMQcMGBA8Zh6RJwAAACDbKEoT0P3c64LHrK2tDR4zj8gTAAAAkG0UpQlYO+eG4DGrqqqCx8wj8gQAAABkG0VpAjYsmhs85rRp04LHzCPyBAAAAGQbRSkAAAAAIDUUpQAAAACA1FCUJqDn+dODx1yxYkXwmHlEngAAAIBsoyhNwKZVi4PHZFbZ8pAnAAAAINsoShOw5r4rgsccOXJk8Jh5RJ4AAACAbKMoBQAAAACkhqIUAAAAAJCaJotSMzvYzB41s5fN7EUzuzBePsnMVpjZwvh2SsFrLjGzxWb2qpkNbc03kAVdho4PHnPq1KnBY+YReQIAAACyrWMZ22yW9EN3X2Bme0mqNbOH4nX/4e6/KNzYzI6WNFpSP0k9JD1sZke4+5YkG54le1UMCx6zqqoqeMw8Ik8AAABAtjU5Uurude6+IH68XtLLknqWeMlpku5294/c/Q1JiyWdkERjs+rNa4YHj2lmwWPmEXkCAAAAsq1Z55SaWW9Jx0t6Jl403syeN7NbzWy/eFlPScsKXrZcpYtYAAAAAEA7VXZRamadJd0n6SJ3f0/SLZL6SKqQVCfp2vpNi7zci+yvysxqzKxmzZo1zW03AAAAAKANKKsoNbNdFRWkd7r7byXJ3Ve5+xZ3/1jSNG07RHe5pIMLXt5L0sqG+3T3anevdPfKrl27tuQ9pG73PoOCxxw+PPwhw3lEngAAAIBsK2f2XZP0K0kvu/u/FyzvXrDZVyW9ED+eIWm0mX3CzA6T1FfS/OSanD0Hfn1i8JgzZ84MHjOPyBMAAACQbeWMlJ4o6WxJJze4/Mu/mdmfzOx5SV+Q9H1JcvcXJd0j6SVJcyRd0JZn3pWk1fdODh5zxIgRwWPmEXkCAAAAss3cdzjdM7jKykqvqalJuxkl9Z4wu9F1b14zXKHzaGbBY+YReQIAAADSZ2a17l5ZbF2zZt8FAAAAACBJFKUAAAAAgNRQlCbg0ItnBY/JIanlIU8AAABAtlGUJmD9wjnBY1ZXVwePmUfkCQAAAMg2itIEvDP3xuAxx40bFzxmHpEnAAAAINsoSgEAAAAAqaEoBQAAAACkhqI0AV1HXRY85owZM4LHzCPyBAAAAGQbRWkCOnU7PHjMgQMHBo+ZR+QJAAAAyDaK0gSsuHlM8Jg9e/YMHjOPyBMAAACQbRSlAAAAAIDUUJQCAAAAAFJDUZqAzscNDR5z7NixwWPmEXkCAAAAso2iNAH7D/tu8JjV1dXBY+YReQIAAACyjaI0AXW3Xxg8JrPKloc8AQAAANlGUZqATauWBI+5YMGC4DHziDwBAAAA2UZRCgAAAABIDUVpAjp07hI8Zvfu3YPHzCPyBAAAAGQbRWkCel1wR/CYK1euDB4zj8gTAAAAkG0UpQlY98SdwWNOmjQpeMw8Ik8AAABAtpm7p90GVVZWek1NTdrNKKn3hNmNrnvzmuEKnUczCx4zj8gTAAAAkD4zq3X3ymLrGCkFAAAAAKSGohQAAAAAkBqK0gQcNGZK8JhZP9w5K8gTAAAAkG0UpQAAAACA1FCUJuCt6RcFj1lZWfQcYTRAngAAAIBsoygFAAAAAKSGohQAAAAAkBqK0gTsc+KZwWNOnDgxeMw8Ik8AAABAtpm7p90GVVZWetZnSe09YXbJ9UuvPjVQSwAAAAAgX8ys1t2LTvjCSGkClt90TvCYPXr0CB4zj8gTAAAAkG0UpQnYsuGd4DHr6uqCx8wj8gQAAABkG0UpAAAAACA1FKUJ6NStT/CYAwYMCB4zj8gTAAAAkG0UpQnofu51wWPW1tYGj5lH5AkAAADINorSBKydc0PwmFVVVcFj5hF5AgAAALKNojQBGxbNDR5z2rRpwWPmEXkCAAAAso2iFAAAAACQGopSAAAAAEBqKEoT0PP86cFjrlixInjMPCJPAAAAQLY1WZSa2cFm9qiZvWxmL5rZhfHyLmb2kJm9Ht/vV/CaS8xssZm9amZDW/MNZMGmVYuDx2RW2fKQJwAAACDbyhkp3Szph+5+lKTBki4ws6MlTZD0iLv3lfRI/FzxutGS+kkaJulmM+vQGo3PijX3XRE85siRI4PHzCPyBAAAAGRbk0Wpu9e5+4L48XpJL0vqKek0SfXHrU6XdHr8+DRJd7v7R+7+hqTFkk5IuN0AAAAAgDagWeeUmllvScdLekZSN3evk6LCVdKB8WY9JS0reNnyeBkAAAAAANspuyg1s86S7pN0kbu/V2rTIsu8yP6qzKzGzGrWrFlTbjMyqcvQ8cFjTp06NXjMPCJPAAAAQLaVVZSa2a6KCtI73f238eJVZtY9Xt9d0up4+XJJBxe8vJeklQ336e7V7l7p7pVdu3bd2fZnwl4Vw4LHrKqqCh4zj8gTAAAAkG3lzL5rkn4l6WV3//eCVTMkjYkfj5H0QMHy0Wb2CTM7TFJfSfOTa3L2vHnN8OAxo48FTSFPAAAAQLZ1LGObEyWdLelPZrYwXvZTSVdLusfMvi3pL5LOkCR3f9HM7pH0kqKZey9w9y1JNxwAAAAAkH9NFqXu/oSKnycqSV9s5DVXSbqqBe0CAAAAALQDzZp9F8Xt3mdQ8JjDh4c/ZDiPyBMAAACQbRSlCTjw6xODx5w5c2bwmHlEngAAAIBsoyhNwOp7JwePOWLEiOAx84g8AQAAANlGUZqAjUueDR5z1qxZwWPmEXkCAAAAso2iFAAAAACQGopSAAAAAEBqKEoTcOjF4Q8RdffgMfOIPAEAAADZ1uR1StG09QvnqPeE0tssvfrURGNWV1erqqoq0X22ReQJAAAAyDZGShPwztwbg8ccN25c8Jh5RJ4AAACAbKMoBQAAAACkhqIUAAAAAJAaitIEdB11WfCYM2bMCB4zj8gTAAAAkG0UpQno1O3w4DEHDhwYPGYekScAAAAg2yhKE7Di5jHBY/bs2TN4zDwiTwAAAEC2UZQCAAAAAFJDUQoAAAAASA1FaQI6Hzc0eMyxY8cGj5lH5AkAAADINorSBOw/7LvBY1ZXVwePmUfkCQAAAMg2itIE1N1+YfCYzCpbHvIEAAAAZBtFaQI2rVoSPOaCBQuCx8wj8gQAAABkG0UpAAAAACA1FKUJ6NC5S/CY3bt3Dx4zj8gTAAAAkG0UpQnodcEdwWOuXLkyeMw8Ik8AAABAtlGUJmDdE3cGjzlp0qTgMfOIPAEAAADZRlGagHefvCt4zMmTJwePmUfkCQAAAMg2ilIAAAAAQGooSgEAAAAAqaEoTcBBY6YEj1lTUxM8Zh6RJwAAACDbKEoBAAAAAKmhKE3AW9MvCh6zsrIyeMw8Ik8AAABAtlGUAgAAAABSQ1EKAAAAAEgNRWkC9jnxzOAxJ06cGDxmHpEnAAAAINvM3dNugyorKz3rs6T2njC7Ra9fevWpCbUEAAAAAPLFzGrdveiEL4yUJmD5TecEj9mjR4/gMfOIPAEAAADZRlGagC0b3gkes66uLnjMPCJPAAAAQLZRlAIAAAAAUkNRmoBO3foEjzlgwIDgMfOIPAEAAADZRlGagO7nXhc8Zm1tbfCYeUSeAAAAgGyjKE3A2jk3BI9ZVVUVPGYekScAAAAg25osSs3sVjNbbWYvFCybZGYrzGxhfDulYN0lZrbYzF41s6Gt1fAs2bBobvCY06ZNCx4zj8gTAAAAkG3ljJTeLmlYkeX/4e4V8e13kmRmR0saLalf/JqbzaxDUo0FAAAAALQtTRal7v6YpHKveXKapLvd/SN3f0PSYkkntKB9AAAAAIA2rCXnlI43s+fjw3v3i5f1lLSsYJvl8bI2ref504PHXLFiRfCYeUSeAAAAgGzb2aL0Fkl9JFVIqpN0bbzcimzrxXZgZlVmVmNmNWvWrNnJZmTDplWLg8dkVtnykCcAAAAg23aqKHX3Ve6+xd0/ljRN2w7RXS7p4IJNe0la2cg+qt290t0ru3btujPNyIw1910RPObIkSODx8wj8gQAAABk204VpWbWveDpVyXVz8w7Q9JoM/uEmR0mqa+k+S1rIgAAAACgrerY1AZmdpekIZIOMLPlkiZKGmJmFYoOzV0qaZwkufuLZnaPpJckbZZ0gbtvaZWWAwAAAAByr8mi1N3PLLL4VyW2v0rSVS1pVN50GTo+eMypU6cGj5lH5AkAAADItpbMvovYXhXFLuPauqqqqoLHzCPyBAAAAGQbRWkC3rxmePCYZsUmOkZD5AkAAADINopSAAAAAEBqKEoBAAAAAKmhKE3A7n0GBY85fHj4Q4bziDwBAAAA2UZRmoADvz4xeMyZM2cGj5lH5AkAAADINorSBKy+d3LwmCNGjAgeM4/IEwAAAJBtFKUJ2Ljk2eAxZ82aFTxmHpEnAAAAINsoSgEAAAAAqaEoBQAAAACkhqI0AYdeHP4QUXcPHjOPyBMAAACQbRSlCVi/cE7wmNXV1cFj5hF5AgAAALKNojQB78y9MXjMcePGBY+ZR+QJAAAAyDaKUgAAAABAaihKAQAAAACpoShNQNdRlwWPOWPGjOAx84g8AQAAANlGUZqATt0ODx5z4MCBwWPmEXkCAAAAso2iNAErbh4TPGbPnj2Dx8wj8gQAAABkG0UpAAAAACA1FKUAAAAAgNRQlCag83FDg8ccO3Zs8Jh5RJ4AAACAbKMoTcD+w74bPGZ1dXXwmHlEngAAAIBsoyhNQN3tFwaPyayy5SFPAAAAQLZRlCZg06olwWMuWLAgeMw8Ik8AAABAtlGUAgAAAABSQ1GagA6duwSP2b179+Ax84g8AQAAANlGUZqAXhfcETzmypUrg8fMI/IEAAAAZBtFaQLWPXFn8JiTJk0KHjOPyBMAAACQbRSlCXj3ybuCx5w8eXLwmHlEngAAAIBsoygFAAAAAKSmY9oNaC96T5hdcv3Sq08N1BIAAAAAyA5GShNw0JgpwWPW1NQEj5lH5AkAAADINopSAAAAAEBqKEoT8Nb0i4LHrKysDB4zj8gTAAAAkG0UpQAAAACA1FCUAgAAAABSQ1GagH1OPDN4zIkTJwaPmUfkCQAAAMg2c/e026DKykrP+iypTV3SpaW4JAwAAACAtsrMat296IQvjJQmYPlN5wSP2aNHj+Ax84g8AQAAANlGUZqALRveCR6zrq4ueMw8Ik8AAABAtjVZlJrZrWa22sxeKFjWxcweMrPX4/v9CtZdYmaLzexVMxvaWg0HAAAAAORfOSOlt0sa1mDZBEmPuHtfSY/Ez2VmR0saLalf/JqbzaxDYq3NqE7d+gSPOWDAgOAx84g8AQAAANnWZFHq7o9Janh86mmSpsePp0s6vWD53e7+kbu/IWmxpBOSaWp2dT/3uuAxa2trg8fMI/IEAAAAZNvOnlPazd3rJCm+PzBe3lPSsoLtlsfL2rS1c24IHrOqqip4zDwiTwAAAEC2JT3RkRVZVvSaM2ZWZWY1ZlazZs2ahJsR1oZFc4PHnDZtWvCYeUSeAAAAgGzb2aJ0lZl1l6T4fnW8fLmkgwu26yVpZbEduHu1u1e6e2XXrl13shkAAAAAgDzb2aJ0hqQx8eMxkh4oWD7azD5hZodJ6itpfsuaCAAAAABoqzo2tYGZ3SVpiKQDzGy5pImSrpZ0j5l9W9JfJJ0hSe7+opndI+klSZslXeDuW1qp7ZnR8/zpTW+UsBUrVgSPmUfkCQAAAMi2JotSdz+zkVVfbGT7qyRd1ZJG5c2mVYvVca/9g8asra1Vjx49gsbMI/IEAAAAZFvSEx21S2vuuyJ4zJEjRwaPmUfkCQAAAMg2ilIAAAAAQGooSgEAAAAAqaEoTUCXoeODx5w6dWrwmHlEngAAAIBsoyhNwF4Vw4LHrKqqCh4zj8gTAAAAkG0UpQl485rhwWOaWfCYeUSeAAAAgGyjKAUAAAAApIaiFAAAAACQGorSBOzeZ1DwmMOHhz9kOI/IEwAAAJBtFKUJOPDrE4PHnDlzZvCYeUSeAAAAgGzrmHYD2oLV905ucWHae8LskuuXXn3qds9HjBhBwVUG8gQAAABkGyOlCdi45NngMWfNmhU8Zh6RJwAAACDbKEoBAAAAAKmhKAUAAAAApIaiNAGHXhz+EFF3Dx4zj8gTAAAAkG0UpQlYv3BO8JjV1dXBY+YReQIAAACyjaI0Ae/MvTF4zHHjxgWPmUfkCQAAAMg2ilIAAAAAQGooSgEAAAAAqaEoTUDXUZcFjzljxozgMfOIPAEAAADZ1jHtBrQFnbod3uoxek+Yvd3zzev/qu8+uW3Z0qtPbfU25NHAgQPTbgIAAACAEhgpTcCKm8e0i5h51LNnz7SbAAAAAKAEilIAAAAAQGooSgEAAAAAqaEoTUDn44a2i5h5NHbs2LSbAAAAAKAEitIE7D/su+0iZh5VV1en3QQAAAAAJVCUJqDu9gvbRcw8YvZdAAAAINsoShOwadWSdhEzjxYsWJB2EwAAAACUQFEKAAAAAEhNx7Qb0BZ06NylTcTsPWF2yfVLrz418ZitrXv37mk3AQAAAEAJjJQmoNcFd7SLmHm0cuXKtJsAAAAAoASK0gSse+LOdhEzjyZNmpR2EwAAAACUQFGagHefvKtdxMyjyZMnp90EAAAAACVQlAIAAAAAUkNRCgAAAABIDUVpAg4aM6VdxMyjmpqatJsAAAAAoAQuCdOONHXJFwAAAAAIjZHSBLw1/aJ2ETOPKisr024CAAAAgBIoSgEAAAAAqeHw3TaCQ3MBAAAA5FGLilIzWyppvaQtkja7e6WZdZH0G0m9JS2V9A13/2vLmplt+5x4ZruImUcTJ05MuwkAAAAASkji8N0vuHuFu9efvDdB0iPu3lfSI/HzNm3fz36rXcTMo0mTJqXdBAAAAAAltMY5padJmh4/ni7p9FaIkSnLbzqnXcTMox49eqTdBAAAAAAltLQodUkPmlmtmVXFy7q5e50kxfcHtjBG5m3Z8E67iJlHdXV1aTcBAAAAQAktnejoRHdfaWYHSnrIzF4p94VxEVslSYccckgLmwEAAAAAyKMWjZS6+8r4frWk+yWdIGmVmXWXpPh+dSOvrXb3Snev7Nq1a0uakbpO3fq0i5h5NGDAgLSbAAAAAKCEnS5KzWxPM9ur/rGkL0t6QdIMSWPizcZIeqCljcy67ude1y5i5lFtbW3aTQAAAABQQktGSrtJesLMFkmaL2m2u8+RdLWkL5nZ65K+FD9v09bOuaFdxMyjqqqqpjcCAAAAkJqdLkrd/c/uflx86+fuV8XL17r7F929b3zf5mfk2bBobruImUfTpk1LuwkAAAAASmiNS8IAAAAAAFCWls6+i3ak94TZJdcvvfrUQC0BAAAA0FYwUpqAnudPbxcx82jFihVpNwEAAABACRSlCdi0anG7iJlHzL4LAAAAZBtFaQLW3HdFu4iZRyNHjky7CQAAAABK4JxSJIZzTgEAAAA0FyOlAAAAAIDUMFKagC5Dx7eLmK2tNUZap06durPNAQAAABAARWkC9qoY1i5itlRTRWdrqKqqCh4TAAAAQPk4fDcBb14zvF3EzCMzS7sJAAAAAEqgKAUAAAAApIaiFAAAAACQGorSBOzeZ1C7iJlHw4dzmDMAAACQZUx0lIADvz6xXcTMuqITKfX75+2Wc61UAAAAIFsoShOw+t7JwYvENGLmUXvLU2tcVgcAAABoTRy+m4CNS55tFzHziDwBAAAA2cZIKXIjxHVOGWkEAAAAwmKkFAAAAACQGorSBBx68ax2ETOPyBMAAACQbRSlCVi/cE67iJlH5AkAAADINorSBLwz98Z2ETOPyBMAAACQbRSlAAAAAIDUMPsukCBm7wUAAACah6I0AV1HXdYuYuZRwzyFuKwMAAAAgPJx+G4COnU7vF3EzCPyBAAAAGQbI6UJWHHzmOCXHkkjZh7lLU8c/tty5BAAACBfGCkFAAAAAKSGohQAAAAAkBqK0gR0Pm5ou4iZR+QJAAAAyDbOKU3A/sO+2y5i5lHSeUp79t6045eDczoBAADQHIyUJqDu9gvbRcw8Ik8AAABAtjFSmoBNq5a0i5h5RJ6aj5FOAAAAhERRCgBtDD8sAACAPKEoTUCHzl3aRcw8Ik9oqJzzcinaAAAAwqEoTUCvC+5oFzHzKGt5SnuiorTjZ6UNAAAAyA4mOkrAuifubBcx84g8AQAAANnGSGkC3n3yLu372W+1+Zh5RJ6QR5wTCgAA2hOKUgDNwuG3KEdL+0l7KLz58QEAgAhFKQA00NqFN8UGykHRCgBoLyhKE3DQmCntImYekSdkUUuLXkarWx8FIQAA4bRaUWpmwyRdJ6mDpF+6+9WtFQsAUL4kitrWLspaWhRSVAIAkB+tUpSaWQdJN0n6kqTlkp41sxnu/lJrxEvbW9Mv0qEXz2rzMfOIPAGtI+3R2rTjtwUU7gCArGitkdITJC129z9LkpndLek0SW2yKAUAtC/toSjOQtHa2m3IwntsSh7aWEo5/1ay/h6Q/36I7GutorSnpGUFz5dL+nQrxQIAoFkoKrO//zxoCzloDzNlp11Qhegnrf0DDdLX1n/gMXdPfqdmZ0ga6u7/FD8/W9IJ7v7dgm2qJFXFT4+U9GriDUnOAZLeTrsRyAX6CspFX0Fz0F9QLvoKmoP+gnIl0VcOdfeuxVa01kjpckkHFzzvJWll4QbuXi2pupXiJ8rMaty9Mu12IPvoKygXfQXNQX9BuegraA76C8rV2n1ll1ba77OS+prZYWbWSdJoSTNaKRYAAAAAIKdaZaTU3Teb2XhJcxVdEuZWd3+xNWIBAAAAAPKr1a5T6u6/k/S71tp/YLk4zBiZQF9BuegraA76C8pFX0Fz0F9QrlbtK60y0REAAAAAAOVorXNKAQAAAABoEkVpCWY2zMxeNbPFZjYh7fYgfWZ2q5mtNrMXCpZ1MbOHzOz1+H6/gnWXxP3nVTMbmk6rkQYzO9jMHjWzl83sRTO7MF5Of8F2zGw3M5tvZovivjI5Xk5fQVFm1sHMnjOzWfFz+gqKMrOlZvYnM1toZjXxMvoLdmBm+5rZvWb2Svzd5TMh+wpFaSPMrIOkmyR9RdLRks40s6PTbRUy4HZJwxosmyDpEXfvK+mR+Lni/jJaUr/4NTfH/Qrtw2ZJP3T3oyQNlnRB3CfoL2joI0knu/txkiokDTOzwaKvoHEXSnq54Dl9BaV8wd0rCi7nQX9BMddJmuPun5J0nKK/McH6CkVp406QtNjd/+zumyTdLem0lNuElLn7Y5LeabD4NEnT48fTJZ1esPxud//I3d+QtFhRv0I74O517r4gfrxe0R/3nqK/oAGPbIif7hrfXPQVFGFmvSSdKumXBYvpK2gO+gu2Y2Z7SzpJ0q8kyd03ufs6BewrFKWN6ylpWcHz5fEyoKFu7l4nRYWIpAPj5fQhSJLMrLek4yU9I/oLiogPx1woabWkh9ydvoLGTJH0E0kfFyyjr6AxLulBM6s1s6p4Gf0FDX1S0hpJt8WnBvzSzPZUwL5CUdo4K7KMqYrRHPQhyMw6S7pP0kXu/l6pTYsso7+0E+6+xd0rJPWSdIKZHVNic/pKO2VmwyWtdvfacl9SZBl9pX050d0HKDod7QIzO6nEtvSX9qujpAGSbnH34yW9r/hQ3UYk3lcoShu3XNLBBc97SVqZUluQbavMrLskxfer4+X0oXbOzHZVVJDe6e6/jRfTX9Co+HCpeYrO0aGvoKETJY00s6WKTis62cz+S/QVNMLdV8b3qyXdr+gQS/oLGlouaXl8lI4k3auoSA3WVyhKG/espL5mdpiZdVJ0Mu+MlNuEbJohaUz8eIykBwqWjzazT5jZYZL6SpqfQvuQAjMzRedmvOzu/16wiv6C7ZhZVzPbN368u6S/l/SK6CtowN0vcfde7t5b0feSP7j7WaKvoAgz29PM9qp/LOnLkl4Q/QUNuPtbkpaZ2ZHxoi9KekkB+0rHlry4LXP3zWY2XtJcSR0k3eruL6bcLKTMzO6SNETSAWa2XNJESVdLusfMvi3pL5LOkCR3f9HM7lH0j3qzpAvcfUsqDUcaTpR0tqQ/xecKStJPRX/BjrpLmh7PXLiLpHvcfZaZPSX6CsrD3xUU003S/dFvpOoo6b/dfY6ZPSv6C3b0XUl3xoNxf5b0j4r/TwrRV8ydQ8UBAAAAAOng8F0AAAAAQGooSgEAAAAAqaEoBQAAAACkhqIUAAAAAJAailIAAAAAQGooSgEAAAAAqaEoBQAAAACkhqIUAAAAAJCa/w8lnEVBHEn47AAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "users_per_item=df.groupby(['item']).count()['rating']\n", + "\n", + "plt.figure(figsize=(16,8))\n", + "plt.hist(users_per_item, bins=100)\n", + "\n", + "# Let's add median\n", + "t=users_per_item.median()\n", + "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", + "plt.text(t*1.1, plt.ylim()[1]*0.9, 'Median: {:.0f}'.format(t))\n", + "\n", + "# Let's add also some percentiles\n", + "t=users_per_item.quantile(0.25)\n", + "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", + "plt.text(t*1.1, plt.ylim()[1]*0.95, '25% quantile: {:.0f}'.format(t))\n", + "\n", + "t=users_per_item.quantile(0.75)\n", + "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", + "plt.text(t*1.05, plt.ylim()[1]*0.95, '75% quantile: {:.0f}'.format(t))\n", + "\n", + "plt.title('Number of ratings per item', fontsize=30)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "rating\n", + "1 0.06110\n", + "2 0.11370\n", + "3 0.27145\n", + "4 0.34174\n", + "5 0.21201\n", + "Name: user, dtype: float64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby(['rating']).count()['user']/len(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Item attributes" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "genres = pd.read_csv('./Datasets/ml-100k/u.genre', sep='|', header=None,\n", + " encoding='latin-1')\n", + "genres=dict(zip(genres[1], genres[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0: 'unknown',\n", + " 1: 'Action',\n", + " 2: 'Adventure',\n", + " 3: 'Animation',\n", + " 4: \"Children's\",\n", + " 5: 'Comedy',\n", + " 6: 'Crime',\n", + " 7: 'Documentary',\n", + " 8: 'Drama',\n", + " 9: 'Fantasy',\n", + " 10: 'Film-Noir',\n", + " 11: 'Horror',\n", + " 12: 'Musical',\n", + " 13: 'Mystery',\n", + " 14: 'Romance',\n", + " 15: 'Sci-Fi',\n", + " 16: 'Thriller',\n", + " 17: 'War',\n", + " 18: 'Western'}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "genres" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "movies = pd.read_csv('./Datasets/ml-100k/u.item', sep='|', encoding='latin-1', header=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...14151617181920212223
01Toy Story (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Toy%20Story%2...00011...0000000000
12GoldenEye (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?GoldenEye%20(...01100...0000000100
23Four Rooms (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Four%20Rooms%...00000...0000000100
\n", + "

3 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 \\\n", + "0 1 Toy Story (1995) 01-Jan-1995 NaN \n", + "1 2 GoldenEye (1995) 01-Jan-1995 NaN \n", + "2 3 Four Rooms (1995) 01-Jan-1995 NaN \n", + "\n", + " 4 5 6 7 8 9 ... \\\n", + "0 http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 0 1 1 ... \n", + "1 http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 1 0 0 ... \n", + "2 http://us.imdb.com/M/title-exact?Four%20Rooms%... 0 0 0 0 0 ... \n", + "\n", + " 14 15 16 17 18 19 20 21 22 23 \n", + "0 0 0 0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 1 0 0 \n", + "2 0 0 0 0 0 0 0 1 0 0 \n", + "\n", + "[3 rows x 24 columns]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies[:3]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(19):\n", + " movies[i+5]=movies[i+5].apply(lambda x: genres[i] if x==1 else '')" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "movies['genre']=movies.iloc[:, 5:].apply(lambda x: ', '.join(x[x!='']), axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "movies=movies[[0,1,'genre']]\n", + "movies.columns=['id', 'title', 'genres']" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitlegenres
01Toy Story (1995)Animation, Children's, Comedy
12GoldenEye (1995)Action, Adventure, Thriller
23Four Rooms (1995)Thriller
34Get Shorty (1995)Action, Comedy, Drama
45Copycat (1995)Crime, Drama, Thriller
\n", + "
" + ], + "text/plain": [ + " id title genres\n", + "0 1 Toy Story (1995) Animation, Children's, Comedy\n", + "1 2 GoldenEye (1995) Action, Adventure, Thriller\n", + "2 3 Four Rooms (1995) Thriller\n", + "3 4 Get Shorty (1995) Action, Comedy, Drama\n", + "4 5 Copycat (1995) Crime, Drama, Thriller" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.to_csv('./Datasets/ml-100k/movies.csv', index=False)\n", + "movies[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Toy example" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "os.makedirs('./Datasets/toy-example/', exist_ok = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "toy_train=pd.DataFrame([[0,0,3,0], [0,10,4,0], [0,40,5,0], [0,70,4,0],\n", + " [10,10,1,0], [10,20,2,0], [10,30,3,0],\n", + " [20,30,5,0], [20,50,3,0], [20,60,4,0]])\n", + "toy_test=pd.DataFrame([[0,60,3,0],\n", + " [10,40,5,0],\n", + " [20,0,5,0], [20,20,4,0], [20,70,2,0]])\n", + "\n", + "toy_train.to_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, index=False)\n", + "toy_test.to_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/.ipynb_checkpoints/P1. Baseline-checkpoint.ipynb b/.ipynb_checkpoints/P1. Baseline-checkpoint.ipynb new file mode 100644 index 0000000..85b9494 --- /dev/null +++ b/.ipynb_checkpoints/P1. Baseline-checkpoint.ipynb @@ -0,0 +1,1527 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Preparing dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.sparse as sparse\n", + "from collections import defaultdict\n", + "from itertools import chain\n", + "import random\n", + "\n", + "train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", + "test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's prepare dataset\n", + "train_and_test=pd.concat([train_read, test_read], axis=0, ignore_index=True)\n", + "train_and_test['user_code'] = train_and_test['user'].astype(\"category\").cat.codes\n", + "train_and_test['item_code'] = train_and_test['item'].astype(\"category\").cat.codes\n", + "\n", + "user_code_id = dict(enumerate(train_and_test['user'].astype(\"category\").cat.categories))\n", + "user_id_code = dict((v, k) for k, v in user_code_id.items())\n", + "item_code_id = dict(enumerate(train_and_test['item'].astype(\"category\").cat.categories))\n", + "item_id_code = dict((v, k) for k, v in item_code_id.items())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
useritemratingtimestampuser_codeitem_code
06645254876526580663524
14912888068651480
23522732884290328351272
361896389130774961795
456024287997677255923
\n", + "
" + ], + "text/plain": [ + " user item rating timestamp user_code item_code\n", + "0 664 525 4 876526580 663 524\n", + "1 49 1 2 888068651 48 0\n", + "2 352 273 2 884290328 351 272\n", + "3 618 96 3 891307749 617 95\n", + "4 560 24 2 879976772 559 23" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_and_test[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "train_df=pd.merge(train_read, train_and_test, on=list(train_read.columns))\n", + "test_df=pd.merge(test_read, train_and_test, on=list(train_read.columns))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Take number of users and items\n", + "(U,I)=(train_and_test['user_code'].max()+1, train_and_test['item_code'].max()+1)\n", + "\n", + "# Create sparse csr matrices\n", + "train_ui = sparse.csr_matrix((train_df['rating'], (train_df['user_code'], train_df['item_code'])), shape=(U, I))\n", + "test_ui = sparse.csr_matrix((test_df['rating'], (test_df['user_code'], test_df['item_code'])), shape=(U, I))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Above steps are the same for many algorithms, so I put the code in separate file:\n", + "import helpers\n", + "train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n", + "test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", + "train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### CSR matrices - what is it?" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<3x4 sparse matrix of type ''\n", + "\twith 8 stored elements in Compressed Sparse Row format>" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "row = np.array([0, 0, 0, 1, 1, 2, 2, 2])\n", + "col = np.array([0, 1, 2, 1, 3, 2, 0, 3])\n", + "data = np.array([4, 1, 3, 2,1, 5, 2, 4])\n", + "sample_csr=sparse.csr_matrix((data, (row, col)))\n", + "sample_csr" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ratings matrix with missing entries replaced by zeros:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[4, 1, 3, 0],\n", + " [0, 2, 0, 1],\n", + " [2, 0, 5, 4]], dtype=int32)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of ratings: 8\n", + "Number of users: 3\n", + "Number of items: 4\n" + ] + } + ], + "source": [ + "print('Ratings matrix with missing entries replaced by zeros:')\n", + "display(sample_csr.todense())\n", + "\n", + "print(f'Number of ratings: {sample_csr.nnz}')\n", + "print(f'Number of users: {sample_csr.shape[0]}')\n", + "print(f'Number of items: {sample_csr.shape[1]}')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ratings data: [4 1 3 2 1 2 5 4]\n", + "Regarding items: [0 1 2 1 3 0 2 3]\n", + "Where ratings from 0 to 2 belongs to user 0.\n", + "Where ratings from 3 to 4 belongs to user 1.\n", + "Where ratings from 5 to 7 belongs to user 2.\n" + ] + } + ], + "source": [ + "print('Ratings data:', sample_csr.data)\n", + "\n", + "print('Regarding items:', sample_csr.indices)\n", + "\n", + "for i in range(sample_csr.shape[0]):\n", + " print(f'Where ratings from {sample_csr.indptr[i]} to {sample_csr.indptr[i+1]-1} belongs to user {i}.')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Efficient way to access items rated by user:\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 0, 6, 10, 27, 49, 78, 95, 97, 116, 143, 153, 156, 167,\n", + " 171, 172, 173, 194, 208, 225, 473, 495, 549, 615], dtype=int32)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "471 ns ± 15.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n", + "Inefficient way to access items rated by user:\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 0, 6, 10, 27, 49, 78, 95, 97, 116, 143, 153, 156, 167,\n", + " 171, 172, 173, 194, 208, 225, 473, 495, 549, 615])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "48.3 µs ± 1.51 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" + ] + } + ], + "source": [ + "user=123\n", + "\n", + "print('Efficient way to access items rated by user:')\n", + "display(train_ui.indices[train_ui.indptr[user]:train_ui.indptr[user+1]])\n", + "%timeit train_ui.indices[train_ui.indptr[user]:train_ui.indptr[user+1]]\n", + "\n", + "print('Inefficient way to access items rated by user:')\n", + "display(train_ui[user].indices)\n", + "%timeit train_ui[user].indices" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###### Example: subtracting row means" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Our matrix:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[4, 1, 3, 0],\n", + " [0, 2, 0, 1],\n", + " [2, 0, 5, 4]], dtype=int32)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "List of row sums:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[ 8, 3, 11]])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print('Our matrix:')\n", + "display(sample_csr.todense())\n", + "print('List of row sums:')\n", + "sample_csr.sum(axis=1).ravel()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Array with row means:\n" + ] + }, + { + "data": { + "text/plain": [ + "array([2.66666667, 1.5 , 3.66666667])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Diagonal csr matrix with inverse of row sums on diagonal:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[2.66666667, 0. , 0. ],\n", + " [0. , 1.5 , 0. ],\n", + " [0. , 0. , 3.66666667]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Let's apply them in nonzero entries:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[2.66666667, 2.66666667, 2.66666667, 0. ],\n", + " [0. , 1.5 , 0. , 1.5 ],\n", + " [3.66666667, 0. , 3.66666667, 3.66666667]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finally after subtraction:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[ 1.33333333, -1.66666667, 0.33333333, 0. ],\n", + " [ 0. , 0.5 , 0. , -0.5 ],\n", + " [-1.66666667, 0. , 1.33333333, 0.33333333]])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print('Array with row means:')\n", + "row_means=np.asarray(sample_csr.sum(axis=1).ravel())[0]/np.diff(sample_csr.indptr)\n", + "display(row_means)\n", + "\n", + "print('Diagonal csr matrix with inverse of row sums on diagonal:')\n", + "display(sparse.diags(row_means).todense())\n", + "\n", + "print(\"\"\"Let's apply them in nonzero entries:\"\"\")\n", + "to_subtract=sparse.diags(row_means)*(sample_csr>0)\n", + "display(to_subtract.todense())\n", + "\n", + "print(\"Finally after subtraction:\")\n", + "sample_csr-to_subtract.todense()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###### Transposing" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample matrix: \n", + " [[4 1 3 0]\n", + " [0 2 0 1]\n", + " [2 0 5 4]]\n", + "\n", + "Indices: \n", + " [0 1 2 1 3 0 2 3]\n", + "\n", + "Transposed matrix: \n", + " [[4 0 2]\n", + " [1 2 0]\n", + " [3 0 5]\n", + " [0 1 4]]\n", + "\n", + "Indices of transposed matrix: \n", + " [0 1 2 1 3 0 2 3]\n", + "\n", + "Reason: \n", + "\n", + "After converting to csr: \n", + " [0 2 0 1 0 2 1 2]\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from scipy import sparse\n", + "row = np.array([0, 0, 0, 1, 1, 2, 2, 2])\n", + "col = np.array([0, 1, 2, 1, 3, 2, 0, 3])\n", + "data = np.array([4, 1, 3, 2,1, 5, 2, 4])\n", + "sample=sparse.csr_matrix((data, (row, col)))\n", + "print('Sample matrix: \\n', sample.A)\n", + "print('\\nIndices: \\n', sample.indices)\n", + "transposed=sample.transpose()\n", + "print('\\nTransposed matrix: \\n', transposed.A)\n", + "print('\\nIndices of transposed matrix: \\n', transposed.indices)\n", + "\n", + "print('\\nReason: ', type(transposed))\n", + "\n", + "print('\\nAfter converting to csr: \\n', transposed.tocsr().indices)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Self made top popular" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "if not os.path.exists('./Recommendations generated/'):\n", + " os.mkdir('./Recommendations generated/')\n", + " os.mkdir('./Recommendations generated/ml-100k/')\n", + " os.mkdir('./Recommendations generated/toy-example/')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "top_pop = []\n", + "train_iu = train_ui.transpose().tocsr()\n", + "scaling_factor = train_ui.max()/max(np.diff(train_iu.indptr))\n", + "\n", + "for i in range(train_iu.shape[0]):\n", + " top_pop.append((i, (train_iu.indptr[i+1]-train_iu.indptr[i])*scaling_factor))\n", + " \n", + "top_pop.sort(key=lambda x: x[1], reverse=True)\n", + "#top_pop is an array of pairs (item, rescaled_popularity) sorted descending from the most popular\n", + "\n", + "k = 10\n", + "result = []\n", + "\n", + "for u in range(train_ui.shape[0]):\n", + " user_rated = train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n", + " rec_user = []\n", + " item_pos = 0\n", + " while len(rec_user)<10:\n", + " if top_pop[item_pos][0] not in user_rated:\n", + " rec_user.append((item_code_id[top_pop[item_pos][0]], top_pop[item_pos][1]))\n", + " item_pos+=1\n", + " result.append([user_code_id[u]]+list(chain(*rec_user)))\n", + "\n", + "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopPop_reco.csv', index=False, header=False)\n", + "\n", + "\n", + "# estimations - score is a bit artificial since that method is not designed for scoring, but for ranking\n", + "\n", + "estimations=[]\n", + "\n", + "for user, item in zip(*test_ui.nonzero()):\n", + " estimations.append([user_code_id[user], item_code_id[item],\n", + " (train_iu.indptr[item+1]-train_iu.indptr[item])*scaling_factor])\n", + "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopPop_estimations.csv', index=False, header=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Self made top rated" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "top_rated = []\n", + "global_avg = sum(train_iu.data)/train_ui.nnz\n", + "\n", + "for i in range(train_iu.shape[0]):\n", + " ratings = train_iu.data[train_iu.indptr[i]: train_iu.indptr[i+1]]\n", + " avg = np.mean(ratings) if len(ratings)>0 else global_avg\n", + " top_rated.append((i, avg))\n", + " \n", + "top_rated.sort(key=lambda x: x[1], reverse=True)\n", + " \n", + "k=10\n", + "result=[]\n", + "\n", + "for u in range(train_ui.shape[0]):\n", + " user_rated=train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n", + " rec_user=[]\n", + " item_pos=0\n", + " while len(rec_user)<10:\n", + " if top_rated[item_pos][0] not in user_rated:\n", + " rec_user.append((item_code_id[top_rated[item_pos][0]], top_rated[item_pos][1]))\n", + " item_pos+=1\n", + " result.append([user_code_id[u]]+list(chain(*rec_user)))\n", + "\n", + "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopRated_reco.csv', index=False, header=False)\n", + "\n", + "\n", + "\n", + "estimations=[]\n", + "d = dict(top_rated)\n", + "\n", + "for user, item in zip(*test_ui.nonzero()):\n", + " estimations.append([user_code_id[user], item_code_id[item], d[item]])\n", + "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopRated_estimations.csv', index=False, header=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...11121314151617181920
018145.011225.011895.012015.01293...13065.014675.014915.015005.015365.0
121195.08145.011225.011895.01201...12935.013065.014675.014915.015005.0
\n", + "

2 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... 11 12 13 \\\n", + "0 1 814 5.0 1122 5.0 1189 5.0 1201 5.0 1293 ... 1306 5.0 1467 \n", + "1 2 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n", + "\n", + " 14 15 16 17 18 19 20 \n", + "0 5.0 1491 5.0 1500 5.0 1536 5.0 \n", + "1 5.0 1467 5.0 1491 5.0 1500 5.0 \n", + "\n", + "[2 rows x 21 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(result)[:2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Self-made baseline" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "class selfBaselineUI():\n", + " \n", + " def fit(self, train_ui):\n", + " self.train_ui=train_ui.copy()\n", + " self.train_iu=train_ui.transpose().tocsr()\n", + " \n", + " result=self.train_ui.copy()\n", + " \n", + " self.row_means=np.asarray(result.sum(axis=1).ravel())[0]/np.diff(result.indptr)\n", + " \n", + " # in csr format after addition or multiplication 0 entries \"disappear\" - so some workaraunds are needed \n", + " # (other option is to define addition/multiplication in a desired way)\n", + " row_means=self.row_means.copy()\n", + " \n", + " max_row_mean=np.max(row_means)\n", + " row_means[row_means==0]=max_row_mean+1\n", + " to_subtract_rows=sparse.diags(row_means)*(result.power(0))\n", + " to_subtract_rows.sort_indices() # needed to have valid .data\n", + " \n", + " subtract=to_subtract_rows.data\n", + " subtract[subtract==max_row_mean+1]=0\n", + " \n", + " result.data=result.data-subtract\n", + "# we can't do result=train_ui-to_subtract_rows since then 0 entries will \"disappear\" in csr format\n", + " self.col_means=np.divide(np.asarray(result.sum(axis=0).ravel())[0], np.diff(self.train_iu.indptr),\\\n", + " out=np.zeros(self.train_iu.shape[0]), where=np.diff(self.train_iu.indptr)!=0) # handling items without ratings\n", + " \n", + " # again - it is possible that some mean will be zero, so let's use the same workaround\n", + " col_means=self.col_means.copy()\n", + " \n", + " max_col_mean=np.max(col_means)\n", + " col_means[col_means==0]=max_col_mean+1\n", + " to_subtract_cols=result.power(0)*sparse.diags(col_means)\n", + " to_subtract_cols.sort_indices() # needed to have valid .data\n", + " \n", + " subtract=to_subtract_cols.data\n", + " subtract[subtract==max_col_mean+1]=0\n", + " \n", + " result.data=result.data-subtract\n", + "\n", + " return result\n", + " \n", + " \n", + " def recommend(self, user_code_id, item_code_id, topK=10):\n", + " estimations=np.tile(self.row_means[:,None], [1, self.train_ui.shape[1]]) +np.tile(self.col_means, [self.train_ui.shape[0], 1])\n", + " \n", + " top_k = defaultdict(list)\n", + " for nb_user, user in enumerate(estimations):\n", + " \n", + " user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]\n", + " for item, score in enumerate(user):\n", + " if item not in user_rated:\n", + " top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n", + " result=[]\n", + " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n", + " for uid, item_scores in top_k.items():\n", + " item_scores.sort(key=lambda x: x[1], reverse=True)\n", + " result.append([uid]+list(chain(*item_scores[:topK])))\n", + " return result\n", + " \n", + " def estimate(self, user_code_id, item_code_id, test_ui):\n", + " result=[]\n", + " for user, item in zip(*test_ui.nonzero()):\n", + " result.append([user_code_id[user], item_code_id[item], self.row_means[user]+self.col_means[item]])\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n", + " [0, 1, 2, 3, 0, 0, 0, 0],\n", + " [0, 0, 0, 5, 0, 3, 4, 0]], dtype=int64)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "After subtracting rows and columns:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[ 0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ],\n", + " [ 0. , -0.5, 0. , 0. , 0. , 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recommend best unseen item:\n" + ] + }, + { + "data": { + "text/plain": [ + "[[0, 30, 5.0], [10, 40, 3.0], [20, 40, 5.0]]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Print estimations on unseen items:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
useritemest_score
00604.0
110403.0
22003.0
320204.0
420704.0
\n", + "
" + ], + "text/plain": [ + " user item est_score\n", + "0 0 60 4.0\n", + "1 10 40 3.0\n", + "2 20 0 3.0\n", + "3 20 20 4.0\n", + "4 20 70 4.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", + "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", + "\n", + "toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \\\n", + "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n", + "\n", + "print('Training data:')\n", + "display(toy_train_ui.todense())\n", + "\n", + "model=selfBaselineUI()\n", + "print('After subtracting rows and columns:')\n", + "display(model.fit(toy_train_ui).todense())\n", + "\n", + "print('Recommend best unseen item:')\n", + "display(model.recommend(toy_user_code_id, toy_item_code_id, topK=1))\n", + "\n", + "print('Print estimations on unseen items:')\n", + "estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_ui))\n", + "estimations.columns=['user', 'item', 'est_score']\n", + "display(estimations)\n", + "\n", + "top_n=pd.DataFrame(model.recommend(toy_user_code_id, toy_item_code_id, topK=3))\n", + "\n", + "top_n.to_csv('Recommendations generated/toy-example/Self_BaselineUI_reco.csv', index=False, header=False)\n", + "\n", + "estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_ui))\n", + "estimations.to_csv('Recommendations generated/toy-example/Self_BaselineUI_estimations.csv', index=False, header=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "model=selfBaselineUI()\n", + "model.fit(train_ui)\n", + "\n", + "top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n", + "\n", + "top_n.to_csv('Recommendations generated/ml-100k/Self_BaselineUI_reco.csv', index=False, header=False)\n", + "\n", + "estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n", + "estimations.to_csv('Recommendations generated/ml-100k/Self_BaselineUI_estimations.csv', index=False, header=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# project task 1: implement self-made BaselineIU" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Implement recommender system which will recommend movies (which user hasn't seen) which is similar to BaselineUI but first subtract column means then row means.\n", + "\n", + "The output should be saved in 'Recommendations generated/ml-100k/Self_BaselineIU_reco.csv' and 'Recommendations generated/ml-100k/Self_BaselineIU_estimations.csv'.\n", + "\n", + "

\n", + "Additional clarification: \n", + "\n", + "Summarizing, the prediction of the rating of the user u regarding the item i should be equal to b_u + b_i.\n", + "The procedure to get b_u and b_i is the following:\n", + "- We have the original user-item ratings matrix M.\n", + "- For each column representing the item i, we compute the mean of ratings and denote by b_i. From each rating in matrix M we subtract the corresponding column mean (b_i) to receive new matrix M'.\n", + "- For each row of matrix M' representing the user u, we compute the mean of ratings and denote by b_u." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "class selfBaselineIU():\n", + " \n", + " def fit(self, train_ui):\n", + " self.train_ui=train_ui.copy()\n", + " self.train_iu=train_ui.transpose().tocsr()\n", + " \n", + " result=self.train_ui.copy()\n", + " \n", + " #we can't do result=train_ui-to_subtract_rows since then 0 entries will \"disappear\" in csr format\n", + " self.col_means=np.divide(np.asarray(result.sum(axis=0).ravel())[0], np.diff(self.train_iu.indptr),\\\n", + " out=np.zeros(self.train_iu.shape[0]), where=np.diff(self.train_iu.indptr)!=0) # handling items without ratings\n", + " \n", + " # again - it is possible that some mean will be zero, so let's use the same workaround\n", + " col_means=self.col_means.copy()\n", + " \n", + " max_col_mean=np.max(col_means)\n", + " col_means[col_means==0]=max_col_mean+1\n", + " to_subtract_cols=result.power(0)*sparse.diags(col_means)\n", + " to_subtract_cols.sort_indices() # needed to have valid .data\n", + " \n", + " subtract=to_subtract_cols.data\n", + " subtract[subtract==max_col_mean+1]=0\n", + " \n", + " result.data=result.data-subtract\n", + "\n", + "\n", + " self.row_means=np.asarray(result.sum(axis=1).ravel())[0]/np.diff(result.indptr)\n", + " \n", + " # in csr format after addition or multiplication 0 entries \"disappear\" - so some workaraunds are needed \n", + " # (other option is to define addition/multiplication in a desired way)\n", + " row_means=self.row_means.copy()\n", + " \n", + " max_row_mean=np.max(row_means)\n", + " row_means[row_means==0]=max_row_mean+1\n", + " to_subtract_rows=sparse.diags(row_means)*(result.power(0))\n", + " to_subtract_rows.sort_indices() # needed to have valid .data\n", + " \n", + " subtract=to_subtract_rows.data\n", + " subtract[subtract==max_row_mean+1]=0\n", + " \n", + " result.data=result.data-subtract\n", + "\n", + " return result\n", + " \n", + " \n", + " def recommend(self, user_code_id, item_code_id, topK=10):\n", + " estimations=np.tile(self.row_means[:,None], [1, self.train_ui.shape[1]]) +np.tile(self.col_means, [self.train_ui.shape[0], 1])\n", + " \n", + " top_k = defaultdict(list)\n", + " for nb_user, user in enumerate(estimations):\n", + " \n", + " user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]\n", + " for item, score in enumerate(user):\n", + " if item not in user_rated:\n", + " top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n", + " result=[]\n", + " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n", + " for uid, item_scores in top_k.items():\n", + " item_scores.sort(key=lambda x: x[1], reverse=True)\n", + " result.append([uid]+list(chain(*item_scores[:topK])))\n", + " return result\n", + " \n", + " def estimate(self, user_code_id, item_code_id, test_ui):\n", + " result=[]\n", + " for user, item in zip(*test_ui.nonzero()):\n", + " result.append([user_code_id[user], item_code_id[item], self.row_means[user]+self.col_means[item]])\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n", + " [0, 1, 2, 3, 0, 0, 0, 0],\n", + " [0, 0, 0, 5, 0, 3, 4, 0]], dtype=int64)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "After subtracting columns and rows:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[-0.375 , 1.125 , 0. , 0. , -0.375 ,\n", + " 0. , 0. , -0.375 ],\n", + " [ 0. , -0.66666667, 0.83333333, -0.16666667, 0. ,\n", + " 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , 0.66666667, 0. ,\n", + " -0.33333333, -0.33333333, 0. ]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recommend best unseen item:\n" + ] + }, + { + "data": { + "text/plain": [ + "[[0, 30, 4.375], [10, 40, 4.166666666666667], [20, 40, 5.333333333333333]]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Print estimations on unseen items:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
useritemest_score
00604.375000
110404.166667
22003.333333
320202.333333
420704.333333
\n", + "
" + ], + "text/plain": [ + " user item est_score\n", + "0 0 60 4.375000\n", + "1 10 40 4.166667\n", + "2 20 0 3.333333\n", + "3 20 20 2.333333\n", + "4 20 70 4.333333" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", + "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", + "\n", + "toy_train_iu, toy_test_iu, toy_user_code_id, toy_user_id_code, \\\n", + "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n", + "\n", + "print('Training data:')\n", + "display(toy_train_iu.todense())\n", + "\n", + "model=selfBaselineIU()\n", + "print('After subtracting columns and rows:')\n", + "display(model.fit(toy_train_iu).todense())\n", + "\n", + "print('Recommend best unseen item:')\n", + "display(model.recommend(toy_user_code_id, toy_item_code_id, topK=1))\n", + "\n", + "print('Print estimations on unseen items:')\n", + "estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_iu))\n", + "estimations.columns=['user', 'item', 'est_score']\n", + "display(estimations)\n", + "\n", + "top_n=pd.DataFrame(model.recommend(toy_user_code_id, toy_item_code_id, topK=3))\n", + "\n", + "top_n.to_csv('Recommendations generated/toy-example/Self_BaselineIU_reco.csv', index=False, header=False)\n", + "\n", + "estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_iu))\n", + "estimations.to_csv('Recommendations generated/toy-example/Self_BaselineIU_estimations.csv', index=False, header=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "model=selfBaselineIU()\n", + "model.fit(train_ui)\n", + "\n", + "top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n", + "\n", + "top_n.to_csv('Recommendations generated/Projects/Project1_Self_BaselineIU_reco.csv', index=False, header=False)\n", + "\n", + "estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n", + "estimations.to_csv('Recommendations generated/Projects/Project1_Self_BaselineIU_estimations.csv', index=False, header=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Ready-made baseline - Surprise implementation" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Estimating biases using als...\n" + ] + } + ], + "source": [ + "import surprise as sp\n", + "import time\n", + "\n", + "# Based on surprise.readthedocs.io\n", + "def get_top_n(predictions, n=10):\n", + " \n", + " # Here we create a dictionary which items are lists of pairs (item, score)\n", + " top_n = defaultdict(list)\n", + " for uid, iid, true_r, est, _ in predictions:\n", + " top_n[uid].append((iid, est))\n", + " \n", + " result=[]\n", + " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n", + " for uid, user_ratings in top_n.items():\n", + " user_ratings.sort(key=lambda x: x[1], reverse=True)\n", + " result.append([uid]+list(chain(*user_ratings[:n]))) \n", + " return result\n", + "\n", + "\n", + "reader = sp.Reader(line_format='user item rating timestamp', sep='\\t')\n", + "trainset = sp.Dataset.load_from_file('./Datasets/ml-100k/train.csv', reader=reader)\n", + "trainset = trainset.build_full_trainset() # -> it is needed for using Surprise package\n", + "\n", + "testset = sp.Dataset.load_from_file('./Datasets/ml-100k/test.csv', reader=reader)\n", + "testset = sp.Trainset.build_testset(testset.build_full_trainset())\n", + "\n", + "algo = sp.BaselineOnly()\n", + "# algo = sp.BaselineOnly(bsl_options={'method':'sgd', 'reg':0, 'n_epochs':2000})\n", + "# observe how bad results gives above algorithm\n", + "# more details http://courses.ischool.berkeley.edu/i290-dm/s11/SECURE/a1-koren.pdf - chapter 2.1\n", + "\n", + "algo.fit(trainset)\n", + "\n", + "antitrainset = trainset.build_anti_testset() # We want to predict ratings of pairs (user, item) which are not in train set\n", + "predictions = algo.test(antitrainset)\n", + "\n", + "top_n = get_top_n(predictions, n=10)\n", + "\n", + "top_n=pd.DataFrame(top_n)\n", + "\n", + "top_n.to_csv('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', index=False, header=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 0.9495\n", + "MAE: 0.7525\n" + ] + }, + { + "data": { + "text/plain": [ + "0.7524871012820799" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compute RMSE on testset using buildin functions\n", + "predictions = algo.test(testset)\n", + "sp.accuracy.rmse(predictions, verbose=True)\n", + "\n", + "# Let's also save the results in file\n", + "predictions_df=[]\n", + "for uid, iid, true_r, est, _ in predictions:\n", + " predictions_df.append([uid, iid, est])\n", + " \n", + "predictions_df=pd.DataFrame(predictions_df)\n", + "predictions_df.to_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', index=False, header=False)\n", + "\n", + "sp.accuracy.mae(predictions, verbose=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Let's compare with random" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 1.5165\n", + "MAE: 1.2172\n" + ] + }, + { + "data": { + "text/plain": [ + "1.2172144988785374" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# in surprise random is an algorithm predicting random value regarding to normal distribution estimated from train set\n", + "algo = sp.NormalPredictor()\n", + "algo.fit(trainset)\n", + "\n", + "antitrainset = trainset.build_anti_testset() # We want to predict ratings of pairs (user, item) which are not in train set\n", + "predictions = algo.test(antitrainset)\n", + "\n", + "top_n = get_top_n(predictions, n=10)\n", + "\n", + "top_n=pd.DataFrame(top_n)\n", + "\n", + "top_n.to_csv('Recommendations generated/ml-100k/Ready_Random_reco.csv', index=False, header=False)\n", + "\n", + "# Compute RMSE on testset using buildin functions\n", + "predictions = algo.test(testset)\n", + "sp.accuracy.rmse(predictions, verbose=True)\n", + "\n", + "# Let's also save the results in file\n", + "predictions_df=[]\n", + "for uid, iid, true_r, est, _ in predictions:\n", + " predictions_df.append([uid, iid, est])\n", + " \n", + "predictions_df=pd.DataFrame(predictions_df)\n", + "predictions_df.to_csv('Recommendations generated/ml-100k/Ready_Random_estimations.csv', index=False, header=False)\n", + "\n", + "sp.accuracy.mae(predictions, verbose=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "metadata": { + "interpreter": { + "hash": "2a3a95f8b675c5b7dd6a35e1675edaf697539b1f0a71c4603e9520a8bbd07d82" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/.ipynb_checkpoints/P2. Evaluation-checkpoint.ipynb b/.ipynb_checkpoints/P2. Evaluation-checkpoint.ipynb new file mode 100644 index 0000000..d4cadb5 --- /dev/null +++ b/.ipynb_checkpoints/P2. Evaluation-checkpoint.ipynb @@ -0,0 +1,1678 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare test set" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.sparse as sparse\n", + "from collections import defaultdict\n", + "from itertools import chain\n", + "import random\n", + "from tqdm import tqdm\n", + "\n", + "# In evaluation we do not load train set - it is not needed\n", + "test = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n", + "test.columns = [\"user\", \"item\", \"rating\", \"timestamp\"]\n", + "\n", + "test[\"user_code\"] = test[\"user\"].astype(\"category\").cat.codes\n", + "test[\"item_code\"] = test[\"item\"].astype(\"category\").cat.codes\n", + "\n", + "user_code_id = dict(enumerate(test[\"user\"].astype(\"category\").cat.categories))\n", + "user_id_code = dict((v, k) for k, v in user_code_id.items())\n", + "item_code_id = dict(enumerate(test[\"item\"].astype(\"category\").cat.categories))\n", + "item_id_code = dict((v, k) for k, v in item_code_id.items())\n", + "\n", + "test_ui = sparse.csr_matrix((test[\"rating\"], (test[\"user_code\"], test[\"item_code\"])))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Estimations metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'Recommendations generated/ml-100k/Ready_Baseline_estimations.csv'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m estimations_df = pd.read_csv(\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;34m\"Recommendations generated/ml-100k/Ready_Baseline_estimations.csv\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m )\n\u001b[0;32m 4\u001b[0m \u001b[0mestimations_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m\"user\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"item\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"score\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)\u001b[0m\n\u001b[0;32m 608\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkwds_defaults\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 609\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 610\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 611\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 612\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 460\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 461\u001b[0m \u001b[1;31m# Create the parser.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 462\u001b[1;33m \u001b[0mparser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 463\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 464\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 817\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"has_index_names\"\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"has_index_names\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 818\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 819\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 820\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 821\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[1;34m(self, engine)\u001b[0m\n\u001b[0;32m 1048\u001b[0m )\n\u001b[0;32m 1049\u001b[0m \u001b[1;31m# error: Too many arguments for \"ParserBase\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1050\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mmapping\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# type: ignore[call-arg]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1051\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1052\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_failover_to_python\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, src, **kwds)\u001b[0m\n\u001b[0;32m 1865\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1866\u001b[0m \u001b[1;31m# open handles\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1867\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_open_handles\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1868\u001b[0m \u001b[1;32massert\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhandles\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1869\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mkey\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m\"storage_options\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"encoding\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"memory_map\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"compression\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_open_handles\u001b[1;34m(self, src, kwds)\u001b[0m\n\u001b[0;32m 1360\u001b[0m \u001b[0mLet\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mreaders\u001b[0m \u001b[0mopen\u001b[0m \u001b[0mIOHanldes\u001b[0m \u001b[0mafter\u001b[0m \u001b[0mthey\u001b[0m \u001b[0mare\u001b[0m \u001b[0mdone\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mtheir\u001b[0m \u001b[0mpotential\u001b[0m \u001b[0mraises\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1361\u001b[0m \"\"\"\n\u001b[1;32m-> 1362\u001b[1;33m self.handles = get_handle(\n\u001b[0m\u001b[0;32m 1363\u001b[0m \u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1364\u001b[0m \u001b[1;34m\"r\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\common.py\u001b[0m in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 640\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"replace\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 641\u001b[0m \u001b[1;31m# Encoding\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 642\u001b[1;33m handle = open(\n\u001b[0m\u001b[0;32m 643\u001b[0m \u001b[0mhandle\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 644\u001b[0m \u001b[0mioargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'Recommendations generated/ml-100k/Ready_Baseline_estimations.csv'" + ] + } + ], + "source": [ + "estimations_df = pd.read_csv(\n", + " \"Recommendations generated/ml-100k/Ready_Baseline_estimations.csv\", header=None\n", + ")\n", + "estimations_df.columns = [\"user\", \"item\", \"score\"]\n", + "\n", + "estimations_df[\"user_code\"] = [user_id_code[user] for user in estimations_df[\"user\"]]\n", + "estimations_df[\"item_code\"] = [item_id_code[item] for item in estimations_df[\"item\"]]\n", + "estimations = sparse.csr_matrix(\n", + " (\n", + " estimations_df[\"score\"],\n", + " (estimations_df[\"user_code\"], estimations_df[\"item_code\"]),\n", + " ),\n", + " shape=test_ui.shape,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def estimations_metrics(test_ui, estimations):\n", + " result = []\n", + "\n", + " RMSE = (np.sum((estimations.data - test_ui.data) ** 2) / estimations.nnz) ** (1 / 2)\n", + " result.append([\"RMSE\", RMSE])\n", + "\n", + " MAE = np.sum(abs(estimations.data - test_ui.data)) / estimations.nnz\n", + " result.append([\"MAE\", MAE])\n", + "\n", + " df_result = (pd.DataFrame(list(zip(*result))[1])).T\n", + " df_result.columns = list(zip(*result))[0]\n", + " return df_result" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'estimations' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;31m# try !pip3 install pandas=='1.0.3' (or pip if you use python 2) and restart the kernel\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mestimations_metrics\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest_ui\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mestimations\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mNameError\u001b[0m: name 'estimations' is not defined" + ] + } + ], + "source": [ + "# in case of error (in the laboratories) you might have to switch to the other version of pandas\n", + "# try !pip3 install pandas=='1.0.3' (or pip if you use python 2) and restart the kernel\n", + "\n", + "estimations_metrics(test_ui, estimations)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ranking metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[663, 475, 62, ..., 472, 269, 503],\n", + " [ 48, 313, 475, ..., 591, 175, 466],\n", + " [351, 313, 475, ..., 591, 175, 466],\n", + " ...,\n", + " [259, 313, 475, ..., 11, 591, 175],\n", + " [ 33, 313, 475, ..., 11, 591, 175],\n", + " [ 77, 313, 475, ..., 11, 591, 175]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "\n", + "reco = np.loadtxt(\n", + " \"Recommendations generated/ml-100k/Ready_Baseline_reco.csv\", delimiter=\",\"\n", + ")\n", + "# Let's ignore scores - they are not used in evaluation:\n", + "users = reco[:, :1]\n", + "items = reco[:, 1::2]\n", + "# Let's use inner ids instead of real ones\n", + "users = np.vectorize(lambda x: user_id_code.setdefault(x, -1))(users)\n", + "items = np.vectorize(lambda x: item_id_code.setdefault(x, -1))(items)\n", + "reco = np.concatenate((users, items), axis=1)\n", + "reco" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def ranking_metrics(test_ui, reco, super_reactions=[], topK=10):\n", + "\n", + " nb_items = test_ui.shape[1]\n", + " (\n", + " relevant_users,\n", + " super_relevant_users,\n", + " prec,\n", + " rec,\n", + " F_1,\n", + " F_05,\n", + " prec_super,\n", + " rec_super,\n", + " ndcg,\n", + " mAP,\n", + " MRR,\n", + " LAUC,\n", + " HR,\n", + " ) = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)\n", + "\n", + " cg = 1.0 / np.log2(np.arange(2, topK + 2))\n", + " cg_sum = np.cumsum(cg)\n", + "\n", + " for (nb_user, user) in tqdm(enumerate(reco[:, 0])):\n", + " u_rated_items = test_ui.indices[test_ui.indptr[user] : test_ui.indptr[user + 1]]\n", + " nb_u_rated_items = len(u_rated_items)\n", + " if (\n", + " nb_u_rated_items > 0\n", + " ): # skip users with no items in test set (still possible that there will be no super items)\n", + " relevant_users += 1\n", + "\n", + " u_super_items = u_rated_items[\n", + " np.vectorize(lambda x: x in super_reactions)(\n", + " test_ui.data[test_ui.indptr[user] : test_ui.indptr[user + 1]]\n", + " )\n", + " ]\n", + " # more natural seems u_super_items=[item for item in u_rated_items if test_ui[user,item] in super_reactions]\n", + " # but accesing test_ui[user,item] is expensive -we should avoid doing it\n", + " if len(u_super_items) > 0:\n", + " super_relevant_users += 1\n", + "\n", + " user_successes = np.zeros(topK)\n", + " nb_user_successes = 0\n", + " user_super_successes = np.zeros(topK)\n", + " nb_user_super_successes = 0\n", + "\n", + " # evaluation\n", + " for (item_position, item) in enumerate(reco[nb_user, 1 : topK + 1]):\n", + " if item in u_rated_items:\n", + " user_successes[item_position] = 1\n", + " nb_user_successes += 1\n", + " if item in u_super_items:\n", + " user_super_successes[item_position] = 1\n", + " nb_user_super_successes += 1\n", + "\n", + " prec_u = nb_user_successes / topK\n", + " prec += prec_u\n", + "\n", + " rec_u = nb_user_successes / nb_u_rated_items\n", + " rec += rec_u\n", + "\n", + " F_1 += 2 * (prec_u * rec_u) / (prec_u + rec_u) if prec_u + rec_u > 0 else 0\n", + " F_05 += (\n", + " (0.5 ** 2 + 1) * (prec_u * rec_u) / (0.5 ** 2 * prec_u + rec_u)\n", + " if prec_u + rec_u > 0\n", + " else 0\n", + " )\n", + "\n", + " prec_super += nb_user_super_successes / topK\n", + " rec_super += nb_user_super_successes / max(\n", + " len(u_super_items), 1\n", + " ) # to set 0 if no super items\n", + " ndcg += np.dot(user_successes, cg) / cg_sum[min(topK, nb_u_rated_items) - 1]\n", + "\n", + " cumsum_successes = np.cumsum(user_successes)\n", + " mAP += np.dot(\n", + " cumsum_successes / np.arange(1, topK + 1), user_successes\n", + " ) / min(topK, nb_u_rated_items)\n", + " MRR += (\n", + " 1 / (user_successes.nonzero()[0][0] + 1)\n", + " if user_successes.nonzero()[0].size > 0\n", + " else 0\n", + " )\n", + " LAUC += (\n", + " np.dot(cumsum_successes, 1 - user_successes)\n", + " + (nb_user_successes + nb_u_rated_items)\n", + " / 2\n", + " * ((nb_items - nb_u_rated_items) - (topK - nb_user_successes))\n", + " ) / ((nb_items - nb_u_rated_items) * nb_u_rated_items)\n", + "\n", + " HR += nb_user_successes > 0\n", + "\n", + " result = []\n", + " result.append((\"precision\", prec / relevant_users))\n", + " result.append((\"recall\", rec / relevant_users))\n", + " result.append((\"F_1\", F_1 / relevant_users))\n", + " result.append((\"F_05\", F_05 / relevant_users))\n", + " result.append((\"precision_super\", prec_super / super_relevant_users))\n", + " result.append((\"recall_super\", rec_super / super_relevant_users))\n", + " result.append((\"NDCG\", ndcg / relevant_users))\n", + " result.append((\"mAP\", mAP / relevant_users))\n", + " result.append((\"MRR\", MRR / relevant_users))\n", + " result.append((\"LAUC\", LAUC / relevant_users))\n", + " result.append((\"HR\", HR / relevant_users))\n", + "\n", + " df_result = (pd.DataFrame(list(zip(*result))[1])).T\n", + " df_result.columns = list(zip(*result))[0]\n", + " return df_result" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "943it [00:00, 9434.06it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
precisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHR
00.091410.0376520.046030.0612860.0796140.0564630.0959570.0431780.1981930.5155010.437964
\n", + "
" + ], + "text/plain": [ + " precision recall F_1 F_05 precision_super recall_super \\\n", + "0 0.09141 0.037652 0.04603 0.061286 0.079614 0.056463 \n", + "\n", + " NDCG mAP MRR LAUC HR \n", + "0 0.095957 0.043178 0.198193 0.515501 0.437964 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ranking_metrics(test_ui, reco, super_reactions=[4, 5], topK=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Diversity metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def diversity_metrics(test_ui, reco, topK=10):\n", + "\n", + " frequencies = defaultdict(int)\n", + "\n", + " # let's assign 0 to all items in test set\n", + " for item in list(set(test_ui.indices)):\n", + " frequencies[item] = 0\n", + "\n", + " # counting frequencies\n", + " for item in reco[:, 1:].flat:\n", + " frequencies[item] += 1\n", + "\n", + " nb_reco_outside_test = frequencies[-1]\n", + " del frequencies[-1]\n", + "\n", + " frequencies = np.array(list(frequencies.values()))\n", + "\n", + " nb_rec_items = len(frequencies[frequencies > 0])\n", + " nb_reco_inside_test = np.sum(frequencies)\n", + "\n", + " frequencies = frequencies / np.sum(frequencies)\n", + " frequencies = np.sort(frequencies)\n", + "\n", + " with np.errstate(\n", + " divide=\"ignore\"\n", + " ): # let's put zeros put items with 0 frequency and ignore division warning\n", + " log_frequencies = np.nan_to_num(np.log(frequencies), posinf=0, neginf=0)\n", + "\n", + " result = []\n", + " result.append(\n", + " (\n", + " \"Reco in test\",\n", + " nb_reco_inside_test / (nb_reco_inside_test + nb_reco_outside_test),\n", + " )\n", + " )\n", + " result.append((\"Test coverage\", nb_rec_items / test_ui.shape[1]))\n", + " result.append((\"Shannon\", -np.dot(frequencies, log_frequencies)))\n", + " result.append(\n", + " (\n", + " \"Gini\",\n", + " np.dot(frequencies, np.arange(1 - len(frequencies), len(frequencies), 2))\n", + " / (len(frequencies) - 1),\n", + " )\n", + " )\n", + "\n", + " df_result = (pd.DataFrame(list(zip(*result))[1])).T\n", + " df_result.columns = list(zip(*result))[0]\n", + " return df_result" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Reco in testTest coverageShannonGini
01.00.0339112.8365130.991139
\n", + "
" + ], + "text/plain": [ + " Reco in test Test coverage Shannon Gini\n", + "0 1.0 0.033911 2.836513 0.991139" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# in case of errors try !pip3 install numpy==1.18.4 (or pip if you use python 2) and restart the kernel\n", + "\n", + "x = diversity_metrics(test_ui, reco, topK=10)\n", + "x" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# To be used in other notebooks" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "943it [00:00, 11012.47it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
00.9494590.7524870.091410.0376520.046030.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379641.00.0339112.8365130.991139
\n", + "
" + ], + "text/plain": [ + " RMSE MAE precision recall F_1 F_05 \\\n", + "0 0.949459 0.752487 0.09141 0.037652 0.04603 0.061286 \n", + "\n", + " precision_super recall_super NDCG mAP MRR LAUC \\\n", + "0 0.079614 0.056463 0.095957 0.043178 0.198193 0.515501 \n", + "\n", + " HR Reco in test Test coverage Shannon Gini \n", + "0 0.437964 1.0 0.033911 2.836513 0.991139 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import evaluation_measures as ev\n", + "\n", + "estimations_df = pd.read_csv(\n", + " \"Recommendations generated/ml-100k/Ready_Baseline_estimations.csv\", header=None\n", + ")\n", + "reco = np.loadtxt(\n", + " \"Recommendations generated/ml-100k/Ready_Baseline_reco.csv\", delimiter=\",\"\n", + ")\n", + "\n", + "ev.evaluate(\n", + " test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n", + " estimations_df=estimations_df,\n", + " reco=reco,\n", + " super_reactions=[4, 5],\n", + ")\n", + "# also you can just type ev.evaluate_all(estimations_df, reco) - I put above values as default" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "943it [00:00, 10346.82it/s]\n", + "943it [00:00, 11772.32it/s]\n", + "943it [00:00, 10636.62it/s]\n", + "943it [00:00, 10767.92it/s]\n", + "943it [00:00, 12019.93it/s]\n" + ] + } + ], + "source": [ + "dir_path = \"Recommendations generated/ml-100k/\"\n", + "super_reactions = [4, 5]\n", + "test = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n", + "\n", + "df = ev.evaluate_all(test, dir_path, super_reactions)\n", + "# also you can just type ev.evaluate_all() - I put above values as default" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_super
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.137473
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.056463
0Ready_Random1.5218451.2259490.0471900.0207530.0248100.0322690.0295060.023707
0Self_TopRated1.0307120.8209040.0009540.0001880.0002980.0004810.0006440.000223
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.000189
\n", + "
" + ], + "text/plain": [ + " Model RMSE MAE precision recall F_1 \\\n", + "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", + "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", + "0 Ready_Random 1.521845 1.225949 0.047190 0.020753 0.024810 \n", + "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n", + "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", + "\n", + " F_05 precision_super recall_super \n", + "0 0.141584 0.130472 0.137473 \n", + "0 0.061286 0.079614 0.056463 \n", + "0 0.032269 0.029506 0.023707 \n", + "0 0.000481 0.000644 0.000223 \n", + "0 0.000463 0.000644 0.000189 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[:, :9]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_TopPop0.2146510.1117070.4009390.5555460.7656421.0000000.0389613.1590790.987317
0Ready_Baseline0.0959570.0431780.1981930.5155010.4379641.0000000.0339112.8365130.991139
0Ready_Random0.0500750.0187280.1219570.5068930.3297990.9865320.1847045.0997060.907217
0Self_TopRated0.0010430.0003350.0033480.4964330.0095440.6990460.0050511.9459100.995669
0Self_BaselineUI0.0007520.0001680.0016770.4964240.0095440.6005300.0050511.8031260.996380
\n", + "
" + ], + "text/plain": [ + " Model NDCG mAP MRR LAUC HR \\\n", + "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n", + "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n", + "0 Ready_Random 0.050075 0.018728 0.121957 0.506893 0.329799 \n", + "0 Self_TopRated 0.001043 0.000335 0.003348 0.496433 0.009544 \n", + "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n", + "\n", + " Reco in test Test coverage Shannon Gini \n", + "0 1.000000 0.038961 3.159079 0.987317 \n", + "0 1.000000 0.033911 2.836513 0.991139 \n", + "0 0.986532 0.184704 5.099706 0.907217 \n", + "0 0.699046 0.005051 1.945910 0.995669 \n", + "0 0.600530 0.005051 1.803126 0.996380 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[:, np.append(0, np.arange(9, df.shape[1]))]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Check metrics on toy dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "3it [00:00, 5771.98it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_BaselineUI1.6124521.40.4444440.8888890.5555560.4786320.3333330.750.6769070.5740740.6111110.6388891.00.8888890.81.3862940.25
\n", + "
" + ], + "text/plain": [ + " Model RMSE MAE precision recall F_1 F_05 \\\n", + "0 Self_BaselineUI 1.612452 1.4 0.444444 0.888889 0.555556 0.478632 \n", + "\n", + " precision_super recall_super NDCG mAP MRR LAUC HR \\\n", + "0 0.333333 0.75 0.676907 0.574074 0.611111 0.638889 1.0 \n", + "\n", + " Reco in test Test coverage Shannon Gini \n", + "0 0.888889 0.8 1.386294 0.25 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n", + " [0, 1, 2, 3, 0, 0, 0, 0],\n", + " [0, 0, 0, 5, 0, 3, 4, 0]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test data:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[0, 0, 0, 0, 0, 0, 3, 0],\n", + " [0, 0, 0, 0, 5, 0, 0, 0],\n", + " [5, 0, 4, 0, 0, 0, 0, 2]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recommendations:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456
00305.0204.0604.0
110403.0602.0702.0
220405.0204.0704.0
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6\n", + "0 0 30 5.0 20 4.0 60 4.0\n", + "1 10 40 3.0 60 2.0 70 2.0\n", + "2 20 40 5.0 20 4.0 70 4.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Estimations:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
useritemest_score
00604.0
110403.0
22003.0
320204.0
420704.0
\n", + "
" + ], + "text/plain": [ + " user item est_score\n", + "0 0 60 4.0\n", + "1 10 40 3.0\n", + "2 20 0 3.0\n", + "3 20 20 4.0\n", + "4 20 70 4.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import helpers\n", + "\n", + "dir_path = \"Recommendations generated/toy-example/\"\n", + "super_reactions = [4, 5]\n", + "test = pd.read_csv(\"./Datasets/toy-example/test.csv\", sep=\"\\t\", header=None)\n", + "\n", + "display(ev.evaluate_all(test, dir_path, super_reactions, topK=3))\n", + "# also you can just type ev.evaluate_all() - I put above values as default\n", + "\n", + "toy_train_read = pd.read_csv(\n", + " \"./Datasets/toy-example/train.csv\",\n", + " sep=\"\\t\",\n", + " header=None,\n", + " names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n", + ")\n", + "toy_test_read = pd.read_csv(\n", + " \"./Datasets/toy-example/test.csv\",\n", + " sep=\"\\t\",\n", + " header=None,\n", + " names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n", + ")\n", + "reco = pd.read_csv(\n", + " \"Recommendations generated/toy-example/Self_BaselineUI_reco.csv\", header=None\n", + ")\n", + "estimations = pd.read_csv(\n", + " \"Recommendations generated/toy-example/Self_BaselineUI_estimations.csv\",\n", + " names=[\"user\", \"item\", \"est_score\"],\n", + ")\n", + "(\n", + " toy_train_ui,\n", + " toy_test_ui,\n", + " toy_user_code_id,\n", + " toy_user_id_code,\n", + " toy_item_code_id,\n", + " toy_item_id_code,\n", + ") = helpers.data_to_csr(toy_train_read, toy_test_read)\n", + "\n", + "print(\"Training data:\")\n", + "display(toy_train_ui.todense())\n", + "\n", + "print(\"Test data:\")\n", + "display(toy_test_ui.todense())\n", + "\n", + "print(\"Recommendations:\")\n", + "display(reco)\n", + "\n", + "print(\"Estimations:\")\n", + "display(estimations)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sample recommendations" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Here is what user rated high:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userratingtitlegenres
5748225Emma (1996)Drama, Romance
5450625Sense and Sensibility (1995)Drama, Romance
4058125Titanic (1997)Action, Drama, Romance
294925Star Wars (1977)Action, Adventure, Romance, Sci-Fi, War
6965325Wings of the Dove, The (1997)Drama, Romance, Thriller
790625As Good As It Gets (1997)Comedy, Drama
6940025Shall We Dance? (1996)Comedy
1446925Fargo (1996)Crime, Drama, Thriller
4615125L.A. Confidential (1997)Crime, Film-Noir, Mystery, Thriller
6729325Good Will Hunting (1997)Drama
2092325Secrets & Lies (1996)Drama
5292125Kolya (1996)Comedy
5010324Mrs. Brown (Her Majesty, Mrs. Brown) (1997)Drama, Romance
5197224Mighty Aphrodite (1995)Comedy
51524Heat (1995)Action, Crime, Thriller
\n", + "
" + ], + "text/plain": [ + " user rating title \\\n", + "57482 2 5 Emma (1996) \n", + "54506 2 5 Sense and Sensibility (1995) \n", + "40581 2 5 Titanic (1997) \n", + "2949 2 5 Star Wars (1977) \n", + "69653 2 5 Wings of the Dove, The (1997) \n", + "7906 2 5 As Good As It Gets (1997) \n", + "69400 2 5 Shall We Dance? (1996) \n", + "14469 2 5 Fargo (1996) \n", + "46151 2 5 L.A. Confidential (1997) \n", + "67293 2 5 Good Will Hunting (1997) \n", + "20923 2 5 Secrets & Lies (1996) \n", + "52921 2 5 Kolya (1996) \n", + "50103 2 4 Mrs. Brown (Her Majesty, Mrs. Brown) (1997) \n", + "51972 2 4 Mighty Aphrodite (1995) \n", + "515 2 4 Heat (1995) \n", + "\n", + " genres \n", + "57482 Drama, Romance \n", + "54506 Drama, Romance \n", + "40581 Action, Drama, Romance \n", + "2949 Action, Adventure, Romance, Sci-Fi, War \n", + "69653 Drama, Romance, Thriller \n", + "7906 Comedy, Drama \n", + "69400 Comedy \n", + "14469 Crime, Drama, Thriller \n", + "46151 Crime, Film-Noir, Mystery, Thriller \n", + "67293 Drama \n", + "20923 Drama \n", + "52921 Comedy \n", + "50103 Drama, Romance \n", + "51972 Comedy \n", + "515 Action, Crime, Thriller " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Here is what we recommend:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userrec_nbtitlegenres
12.01Great Day in Harlem, A (1994)Documentary
9432.02Tough and Deadly (1995)Action, Drama, Thriller
18852.03Aiqing wansui (1994)Drama
28272.04Delta of Venus (1994)Drama
37692.05Someone Else's America (1995)Drama
47112.06Saint of Fort Washington, The (1993)Drama
56532.07Celestial Clockwork (1994)Comedy
65952.08Some Mother's Son (1996)Drama
84892.09Maya Lin: A Strong Clear Vision (1994)Documentary
75362.010Prefontaine (1997)Drama
\n", + "
" + ], + "text/plain": [ + " user rec_nb title \\\n", + "1 2.0 1 Great Day in Harlem, A (1994) \n", + "943 2.0 2 Tough and Deadly (1995) \n", + "1885 2.0 3 Aiqing wansui (1994) \n", + "2827 2.0 4 Delta of Venus (1994) \n", + "3769 2.0 5 Someone Else's America (1995) \n", + "4711 2.0 6 Saint of Fort Washington, The (1993) \n", + "5653 2.0 7 Celestial Clockwork (1994) \n", + "6595 2.0 8 Some Mother's Son (1996) \n", + "8489 2.0 9 Maya Lin: A Strong Clear Vision (1994) \n", + "7536 2.0 10 Prefontaine (1997) \n", + "\n", + " genres \n", + "1 Documentary \n", + "943 Action, Drama, Thriller \n", + "1885 Drama \n", + "2827 Drama \n", + "3769 Drama \n", + "4711 Drama \n", + "5653 Comedy \n", + "6595 Drama \n", + "8489 Documentary \n", + "7536 Drama " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train = pd.read_csv(\n", + " \"./Datasets/ml-100k/train.csv\",\n", + " sep=\"\\t\",\n", + " header=None,\n", + " names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n", + ")\n", + "items = pd.read_csv(\"./Datasets/ml-100k/movies.csv\")\n", + "\n", + "user = random.choice(list(set(train[\"user\"])))\n", + "\n", + "train_content = pd.merge(train, items, left_on=\"item\", right_on=\"id\")\n", + "\n", + "print(\"Here is what user rated high:\")\n", + "display(\n", + " train_content[train_content[\"user\"] == user][\n", + " [\"user\", \"rating\", \"title\", \"genres\"]\n", + " ].sort_values(by=\"rating\", ascending=False)[:15]\n", + ")\n", + "\n", + "reco = np.loadtxt(\n", + " \"Recommendations generated/ml-100k/Self_BaselineUI_reco.csv\", delimiter=\",\"\n", + ")\n", + "items = pd.read_csv(\"./Datasets/ml-100k/movies.csv\")\n", + "\n", + "# Let's ignore scores - they are not used in evaluation:\n", + "reco_users = reco[:, :1]\n", + "reco_items = reco[:, 1::2]\n", + "# Let's put them into one array\n", + "reco = np.concatenate((reco_users, reco_items), axis=1)\n", + "\n", + "# Let's rebuild it user-item dataframe\n", + "recommended = []\n", + "for row in reco:\n", + " for rec_nb, entry in enumerate(row[1:]):\n", + " recommended.append((row[0], rec_nb + 1, entry))\n", + "recommended = pd.DataFrame(recommended, columns=[\"user\", \"rec_nb\", \"item\"])\n", + "\n", + "recommended_content = pd.merge(recommended, items, left_on=\"item\", right_on=\"id\")\n", + "\n", + "print(\"Here is what we recommend:\")\n", + "recommended_content[recommended_content[\"user\"] == user][\n", + " [\"user\", \"rec_nb\", \"title\", \"genres\"]\n", + "].sort_values(by=\"rec_nb\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# project task 2: implement some other evaluation measure" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# it may be your idea, modification of what we have already implemented\n", + "# (for example Hit2 rate which would count as a success users whoreceived at least 2 relevant recommendations)\n", + "# or something well-known\n", + "# expected output: modification of evaluation_measures.py such that evaluate_all will also display your measure" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/.ipynb_checkpoints/P3. k-nearest neighbours-checkpoint.ipynb b/.ipynb_checkpoints/P3. k-nearest neighbours-checkpoint.ipynb new file mode 100644 index 0000000..a15592c --- /dev/null +++ b/.ipynb_checkpoints/P3. k-nearest neighbours-checkpoint.ipynb @@ -0,0 +1,1057 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Self made simplified I-KNN" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import helpers\n", + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.sparse as sparse\n", + "from collections import defaultdict\n", + "from itertools import chain\n", + "import random\n", + "\n", + "train_read = pd.read_csv(\"./Datasets/ml-100k/train.csv\", sep=\"\\t\", header=None)\n", + "test_read = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n", + "(\n", + " train_ui,\n", + " test_ui,\n", + " user_code_id,\n", + " user_id_code,\n", + " item_code_id,\n", + " item_id_code,\n", + ") = helpers.data_to_csr(train_read, test_read)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "class IKNN:\n", + " def fit(self, train_ui):\n", + " self.train_ui = train_ui\n", + "\n", + " train_iu = train_ui.transpose()\n", + " norms = np.linalg.norm(\n", + " train_iu.A, axis=1\n", + " ) # here we compute length of each item ratings vector\n", + " norms = np.vectorize(lambda x: max(x, 1))(\n", + " norms[:, None]\n", + " ) # to avoid dividing by zero\n", + "\n", + " normalized_train_iu = sparse.csr_matrix(train_iu / norms)\n", + "\n", + " self.similarity_matrix_ii = (\n", + " normalized_train_iu * normalized_train_iu.transpose()\n", + " )\n", + "\n", + " self.estimations = np.array(\n", + " train_ui\n", + " * self.similarity_matrix_ii\n", + " / ((train_ui > 0) * self.similarity_matrix_ii)\n", + " )\n", + "\n", + " def recommend(self, user_code_id, item_code_id, topK=10):\n", + "\n", + " top_k = defaultdict(list)\n", + " for nb_user, user in enumerate(self.estimations):\n", + "\n", + " user_rated = self.train_ui.indices[\n", + " self.train_ui.indptr[nb_user] : self.train_ui.indptr[nb_user + 1]\n", + " ]\n", + " for item, score in enumerate(user):\n", + " if item not in user_rated and not np.isnan(score):\n", + " top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n", + " result = []\n", + " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n", + " for uid, item_scores in top_k.items():\n", + " item_scores.sort(key=lambda x: x[1], reverse=True)\n", + " result.append([uid] + list(chain(*item_scores[:topK])))\n", + " return result\n", + "\n", + " def estimate(self, user_code_id, item_code_id, test_ui):\n", + " result = []\n", + " for user, item in zip(*test_ui.nonzero()):\n", + " result.append(\n", + " [\n", + " user_code_id[user],\n", + " item_code_id[item],\n", + " self.estimations[user, item]\n", + " if not np.isnan(self.estimations[user, item])\n", + " else 1,\n", + " ]\n", + " )\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "toy train ui:\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[3, 4, 0, 0, 5, 0, 0, 4],\n", + " [0, 1, 2, 3, 0, 0, 0, 0],\n", + " [0, 0, 0, 5, 0, 3, 4, 0]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "similarity matrix:\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[1. , 0.9701425 , 0. , 0. , 1. ,\n", + " 0. , 0. , 1. ],\n", + " [0.9701425 , 1. , 0.24253563, 0.12478355, 0.9701425 ,\n", + " 0. , 0. , 0.9701425 ],\n", + " [0. , 0.24253563, 1. , 0.51449576, 0. ,\n", + " 0. , 0. , 0. ],\n", + " [0. , 0.12478355, 0.51449576, 1. , 0. ,\n", + " 0.85749293, 0.85749293, 0. ],\n", + " [1. , 0.9701425 , 0. , 0. , 1. ,\n", + " 0. , 0. , 1. ],\n", + " [0. , 0. , 0. , 0.85749293, 0. ,\n", + " 1. , 1. , 0. ],\n", + " [0. , 0. , 0. , 0.85749293, 0. ,\n", + " 1. , 1. , 0. ],\n", + " [1. , 0.9701425 , 0. , 0. , 1. ,\n", + " 0. , 0. , 1. ]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "estimations matrix:\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[4. , 4. , 4. , 4. , 4. ,\n", + " nan, nan, 4. ],\n", + " [1. , 1.35990333, 2.15478388, 2.53390319, 1. ,\n", + " 3. , 3. , 1. ],\n", + " [ nan, 5. , 5. , 4.05248907, nan,\n", + " 3.95012863, 3.95012863, nan]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "[[0, 20, 4.0, 30, 4.0],\n", + " [10, 50, 3.0, 60, 3.0, 0, 1.0, 40, 1.0, 70, 1.0],\n", + " [20, 10, 5.0, 20, 5.0]]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# toy example\n", + "toy_train_read = pd.read_csv(\n", + " \"./Datasets/toy-example/train.csv\",\n", + " sep=\"\\t\",\n", + " header=None,\n", + " names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n", + ")\n", + "toy_test_read = pd.read_csv(\n", + " \"./Datasets/toy-example/test.csv\",\n", + " sep=\"\\t\",\n", + " header=None,\n", + " names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n", + ")\n", + "\n", + "(\n", + " toy_train_ui,\n", + " toy_test_ui,\n", + " toy_user_code_id,\n", + " toy_user_id_code,\n", + " toy_item_code_id,\n", + " toy_item_id_code,\n", + ") = helpers.data_to_csr(toy_train_read, toy_test_read)\n", + "\n", + "\n", + "model = IKNN()\n", + "model.fit(toy_train_ui)\n", + "\n", + "print(\"toy train ui:\")\n", + "display(toy_train_ui.A)\n", + "\n", + "print(\"similarity matrix:\")\n", + "display(model.similarity_matrix_ii.A)\n", + "\n", + "print(\"estimations matrix:\")\n", + "display(model.estimations)\n", + "\n", + "model.recommend(toy_user_code_id, toy_item_code_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "model = IKNN()\n", + "model.fit(train_ui)\n", + "\n", + "top_n = pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n", + "\n", + "top_n.to_csv(\n", + " \"Recommendations generated/ml-100k/Self_IKNN_reco.csv\", index=False, header=False\n", + ")\n", + "\n", + "estimations = pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n", + "estimations.to_csv(\n", + " \"Recommendations generated/ml-100k/Self_IKNN_estimations.csv\",\n", + " index=False,\n", + " header=False,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "943it [00:00, 9004.71it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
01.0183630.8087930.0003180.0001080.000140.0001890.00.00.0002140.0000370.0003680.4963910.0031810.3921530.115444.1747410.965327
\n", + "
" + ], + "text/plain": [ + " RMSE MAE precision recall F_1 F_05 \\\n", + "0 1.018363 0.808793 0.000318 0.000108 0.00014 0.000189 \n", + "\n", + " precision_super recall_super NDCG mAP MRR LAUC \\\n", + "0 0.0 0.0 0.000214 0.000037 0.000368 0.496391 \n", + "\n", + " HR Reco in test Test coverage Shannon Gini \n", + "0 0.003181 0.392153 0.11544 4.174741 0.965327 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import evaluation_measures as ev\n", + "\n", + "estimations_df = pd.read_csv(\n", + " \"Recommendations generated/ml-100k/Self_IKNN_estimations.csv\", header=None\n", + ")\n", + "reco = np.loadtxt(\"Recommendations generated/ml-100k/Self_IKNN_reco.csv\", delimiter=\",\")\n", + "\n", + "ev.evaluate(\n", + " test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n", + " estimations_df=estimations_df,\n", + " reco=reco,\n", + " super_reactions=[4, 5],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "943it [00:00, 8517.83it/s]\n", + "943it [00:00, 11438.64it/s]\n", + "943it [00:00, 11933.36it/s]\n", + "943it [00:00, 10307.81it/s]\n", + "943it [00:00, 12250.41it/s]\n", + "943it [00:00, 12064.07it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.1374730.2146510.1117070.4009390.5555460.7656421.0000000.0389613.1590790.987317
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379641.0000000.0339112.8365130.991139
0Ready_Random1.5218451.2259490.0471900.0207530.0248100.0322690.0295060.0237070.0500750.0187280.1219570.5068930.3297990.9865320.1847045.0997060.907217
0Self_TopRated1.0307120.8209040.0009540.0001880.0002980.0004810.0006440.0002230.0010430.0003350.0033480.4964330.0095440.6990460.0050511.9459100.995669
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.0001890.0007520.0001680.0016770.4964240.0095440.6005300.0050511.8031260.996380
0Self_IKNN1.0183630.8087930.0003180.0001080.0001400.0001890.0000000.0000000.0002140.0000370.0003680.4963910.0031810.3921530.1154404.1747410.965327
\n", + "
" + ], + "text/plain": [ + " Model RMSE MAE precision recall F_1 \\\n", + "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", + "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", + "0 Ready_Random 1.521845 1.225949 0.047190 0.020753 0.024810 \n", + "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n", + "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", + "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n", + "\n", + " F_05 precision_super recall_super NDCG mAP MRR \\\n", + "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n", + "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n", + "0 0.032269 0.029506 0.023707 0.050075 0.018728 0.121957 \n", + "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n", + "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n", + "0 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 \n", + "\n", + " LAUC HR Reco in test Test coverage Shannon Gini \n", + "0 0.555546 0.765642 1.000000 0.038961 3.159079 0.987317 \n", + "0 0.515501 0.437964 1.000000 0.033911 2.836513 0.991139 \n", + "0 0.506893 0.329799 0.986532 0.184704 5.099706 0.907217 \n", + "0 0.496433 0.009544 0.699046 0.005051 1.945910 0.995669 \n", + "0 0.496424 0.009544 0.600530 0.005051 1.803126 0.996380 \n", + "0 0.496391 0.003181 0.392153 0.115440 4.174741 0.965327 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir_path = \"Recommendations generated/ml-100k/\"\n", + "super_reactions = [4, 5]\n", + "test = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n", + "\n", + "ev.evaluate_all(test, dir_path, super_reactions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Ready-made KNNs - Surprise implementation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### I-KNN - basic" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Generating predictions...\n", + "Generating top N recommendations...\n", + "Generating predictions...\n" + ] + } + ], + "source": [ + "import helpers\n", + "import surprise as sp\n", + "\n", + "sim_options = {\n", + " \"name\": \"cosine\",\n", + " \"user_based\": False,\n", + "} # compute similarities between items\n", + "algo = sp.KNNBasic(sim_options=sim_options)\n", + "\n", + "helpers.ready_made(\n", + " algo,\n", + " reco_path=\"Recommendations generated/ml-100k/Ready_I-KNN_reco.csv\",\n", + " estimations_path=\"Recommendations generated/ml-100k/Ready_I-KNN_estimations.csv\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### U-KNN - basic" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Generating predictions...\n", + "Generating top N recommendations...\n", + "Generating predictions...\n" + ] + } + ], + "source": [ + "sim_options = {\n", + " \"name\": \"cosine\",\n", + " \"user_based\": True,\n", + "} # compute similarities between users\n", + "algo = sp.KNNBasic(sim_options=sim_options)\n", + "\n", + "helpers.ready_made(\n", + " algo,\n", + " reco_path=\"Recommendations generated/ml-100k/Ready_U-KNN_reco.csv\",\n", + " estimations_path=\"Recommendations generated/ml-100k/Ready_U-KNN_estimations.csv\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### I-KNN - on top baseline" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Estimating biases using als...\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Generating predictions...\n", + "Generating top N recommendations...\n", + "Generating predictions...\n" + ] + } + ], + "source": [ + "sim_options = {\n", + " \"name\": \"cosine\",\n", + " \"user_based\": False,\n", + "} # compute similarities between items\n", + "algo = sp.KNNBaseline()\n", + "\n", + "helpers.ready_made(\n", + " algo,\n", + " reco_path=\"Recommendations generated/ml-100k/Ready_I-KNNBaseline_reco.csv\",\n", + " estimations_path=\"Recommendations generated/ml-100k/Ready_I-KNNBaseline_estimations.csv\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "943it [00:00, 11286.27it/s]\n", + "943it [00:00, 10874.86it/s]\n", + "943it [00:00, 11509.97it/s]\n", + "943it [00:00, 11855.81it/s]\n", + "943it [00:00, 11574.00it/s]\n", + "943it [00:00, 11080.19it/s]\n", + "943it [00:00, 11550.84it/s]\n", + "943it [00:00, 12148.14it/s]\n", + "943it [00:00, 10779.39it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.1374730.2146510.1117070.4009390.5555460.7656421.0000000.0389613.1590790.987317
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379641.0000000.0339112.8365130.991139
0Ready_Random1.5218451.2259490.0471900.0207530.0248100.0322690.0295060.0237070.0500750.0187280.1219570.5068930.3297990.9865320.1847045.0997060.907217
0Ready_I-KNN1.0303860.8130670.0260870.0069080.0105930.0160460.0211370.0095220.0242140.0089580.0480680.4998850.1548250.4023330.4343435.1336500.877999
0Ready_I-KNNBaseline0.9353270.7374240.0025450.0007550.0011050.0016020.0022530.0009300.0034440.0013620.0117600.4967240.0212090.4828210.0598852.2325780.994487
0Ready_U-KNN1.0234950.8079130.0007420.0002050.0003050.0004490.0005360.0001980.0008450.0002740.0027440.4964410.0074230.6021210.0108232.0891860.995706
0Self_TopRated1.0307120.8209040.0009540.0001880.0002980.0004810.0006440.0002230.0010430.0003350.0033480.4964330.0095440.6990460.0050511.9459100.995669
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.0001890.0007520.0001680.0016770.4964240.0095440.6005300.0050511.8031260.996380
0Self_IKNN1.0183630.8087930.0003180.0001080.0001400.0001890.0000000.0000000.0002140.0000370.0003680.4963910.0031810.3921530.1154404.1747410.965327
\n", + "
" + ], + "text/plain": [ + " Model RMSE MAE precision recall F_1 \\\n", + "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", + "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", + "0 Ready_Random 1.521845 1.225949 0.047190 0.020753 0.024810 \n", + "0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 \n", + "0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 \n", + "0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 \n", + "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n", + "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", + "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n", + "\n", + " F_05 precision_super recall_super NDCG mAP MRR \\\n", + "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n", + "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n", + "0 0.032269 0.029506 0.023707 0.050075 0.018728 0.121957 \n", + "0 0.016046 0.021137 0.009522 0.024214 0.008958 0.048068 \n", + "0 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 \n", + "0 0.000449 0.000536 0.000198 0.000845 0.000274 0.002744 \n", + "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n", + "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n", + "0 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 \n", + "\n", + " LAUC HR Reco in test Test coverage Shannon Gini \n", + "0 0.555546 0.765642 1.000000 0.038961 3.159079 0.987317 \n", + "0 0.515501 0.437964 1.000000 0.033911 2.836513 0.991139 \n", + "0 0.506893 0.329799 0.986532 0.184704 5.099706 0.907217 \n", + "0 0.499885 0.154825 0.402333 0.434343 5.133650 0.877999 \n", + "0 0.496724 0.021209 0.482821 0.059885 2.232578 0.994487 \n", + "0 0.496441 0.007423 0.602121 0.010823 2.089186 0.995706 \n", + "0 0.496433 0.009544 0.699046 0.005051 1.945910 0.995669 \n", + "0 0.496424 0.009544 0.600530 0.005051 1.803126 0.996380 \n", + "0 0.496391 0.003181 0.392153 0.115440 4.174741 0.965327 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir_path = \"Recommendations generated/ml-100k/\"\n", + "super_reactions = [4, 5]\n", + "test = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n", + "\n", + "ev.evaluate_all(test, dir_path, super_reactions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# project task 3: use a version of your choice of Surprise KNNalgorithm" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# read the docs and try to find best parameter configuration (let say in terms of RMSE)\n", + "# https://surprise.readthedocs.io/en/stable/knn_inspired.html##surprise.prediction_algorithms.knns.KNNBaseline\n", + "# the solution here can be similar to examples above\n", + "# please save the output in 'Recommendations generated/ml-100k/Self_KNNSurprisetask_reco.csv' and\n", + "# 'Recommendations generated/ml-100k/Self_KNNSurprisetask_estimations.csv'" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/P0. Data preparation.ipynb b/P0. Data preparation.ipynb index e905e56..c40508c 100644 --- a/P0. Data preparation.ipynb +++ b/P0. Data preparation.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -137,7 +137,7 @@ "4 166 346 1 886397596" ] }, - "execution_count": 17, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -155,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -184,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -226,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -268,7 +268,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -283,7 +283,7 @@ "Name: user, dtype: float64" ] }, - "execution_count": 21, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -301,7 +301,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -312,7 +312,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -339,7 +339,7 @@ " 18: 'Western'}" ] }, - "execution_count": 23, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -350,7 +350,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -359,7 +359,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -503,7 +503,7 @@ "[3 rows x 24 columns]" ] }, - "execution_count": 25, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -514,7 +514,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -524,7 +524,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -533,7 +533,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -543,7 +543,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -616,7 +616,7 @@ "4 5 Copycat (1995) Crime, Drama, Thriller" ] }, - "execution_count": 29, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -635,7 +635,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -644,7 +644,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ diff --git a/P1. Baseline.ipynb b/P1. Baseline.ipynb index 3dbaf3a..85b9494 100644 --- a/P1. Baseline.ipynb +++ b/P1. Baseline.ipynb @@ -306,7 +306,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "658 ns ± 16.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n", + "471 ns ± 15.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n", "Inefficient way to access items rated by user:\n" ] }, @@ -324,7 +324,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "67.8 µs ± 1.68 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" + "48.3 µs ± 1.51 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" ] } ], @@ -1318,7 +1318,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -1342,7 +1342,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1446,24 +1446,24 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "RMSE: 1.5230\n", - "MAE: 1.2226\n" + "RMSE: 1.5165\n", + "MAE: 1.2172\n" ] }, { "data": { "text/plain": [ - "1.2226271020019277" + "1.2172144988785374" ] }, - "execution_count": 30, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1496,34 +1496,6 @@ "\n", "sp.accuracy.mae(predictions, verbose=True)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/P2. Evaluation.ipynb b/P2. Evaluation.ipynb index fdea66d..e89d78d 100644 --- a/P2. Evaluation.ipynb +++ b/P2. Evaluation.ipynb @@ -1684,7 +1684,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.8" } }, "nbformat": 4, diff --git a/P3. k-nearest neighbours.ipynb b/P3. k-nearest neighbours.ipynb index 17eecae..a15592c 100644 --- a/P3. k-nearest neighbours.ipynb +++ b/P3. k-nearest neighbours.ipynb @@ -1049,7 +1049,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.8" } }, "nbformat": 4,