diff --git a/.ipynb_checkpoints/P0. Data preparation-checkpoint.ipynb b/.ipynb_checkpoints/P0. Data preparation-checkpoint.ipynb
new file mode 100644
index 0000000..e905e56
--- /dev/null
+++ b/.ipynb_checkpoints/P0. Data preparation-checkpoint.ipynb
@@ -0,0 +1,698 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Building train and test sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# if you don't have some library installed try using pip (or pip3) to install it - you can do it from the notebook\n",
+ "# example: !pip install tqdm\n",
+ "# also on labs it's better to use python3 kernel - ipython3 notebook\n",
+ "\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import scipy.sparse as sparse\n",
+ "import time\n",
+ "import random\n",
+ "import matplotlib\n",
+ "import matplotlib.pyplot as plt\n",
+ "import os\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "import helpers\n",
+ "\n",
+ "os.makedirs('./Datasets/', exist_ok = True)\n",
+ "\n",
+ "helpers.download_movielens_100k_dataset()\n",
+ "\n",
+ "df=pd.read_csv('./Datasets/ml-100k/u.data',delimiter='\\t', header=None)\n",
+ "df.columns=['user', 'item', 'rating', 'timestamp']\n",
+ "\n",
+ "train, test = train_test_split(df, test_size=0.2, random_state=30)\n",
+ "\n",
+ "train.to_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, index=False)\n",
+ "test.to_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None, index=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Interactions properties"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### How data looks like?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " item | \n",
+ " rating | \n",
+ " timestamp | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 196 | \n",
+ " 242 | \n",
+ " 3 | \n",
+ " 881250949 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 186 | \n",
+ " 302 | \n",
+ " 3 | \n",
+ " 891717742 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 22 | \n",
+ " 377 | \n",
+ " 1 | \n",
+ " 878887116 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 244 | \n",
+ " 51 | \n",
+ " 2 | \n",
+ " 880606923 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 166 | \n",
+ " 346 | \n",
+ " 1 | \n",
+ " 886397596 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user item rating timestamp\n",
+ "0 196 242 3 881250949\n",
+ "1 186 302 3 891717742\n",
+ "2 22 377 1 878887116\n",
+ "3 244 51 2 880606923\n",
+ "4 166 346 1 886397596"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[:5]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Sample properties"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "We have 943 users, 1682 items and 100000 ratings.\n",
+ "\n",
+ "Average number of ratings per user is 106.0445. \n",
+ "\n",
+ "Average number of ratings per item is 59.453.\n",
+ "\n",
+ "Data sparsity (% of missing entries) is 93.6953%.\n"
+ ]
+ }
+ ],
+ "source": [
+ "users, items, ratings=df['user'].nunique(), df['item'].nunique(), len(df)\n",
+ "\n",
+ "print(f'We have {users} users, {items} items and {ratings} ratings.\\n')\n",
+ "\n",
+ "print(f'Average number of ratings per user is {round(ratings/users,4)}. \\n')\n",
+ "print(f'Average number of ratings per item is {round(ratings/items,4)}.\\n')\n",
+ "print(f'Data sparsity (% of missing entries) is {round(100*(1-ratings/(users*items)),4)}%.')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6UAAAHvCAYAAACsfXllAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABM+UlEQVR4nO3deZgcZbn+8fshC4JB2UJMwjKIyB4mSYNBFKIcJJIQUUSTnyAcPAkqKIhyjHIgQVGD4DHiwiERDkExqHCALMi+CAjCTAgIMQKBAbIQwk4gLAnP74+qmXRN9fRMZqm3eur7ua6+uruquvrut2qSfvqtesvcXQAAAAAAhLBJ6AAAAAAAgOKiKAUAAAAABENRCgAAAAAIhqIUAAAAABAMRSkAAAAAIBiKUgAAAABAMBSlAJARM5tmZh7fRofOU2vMbGsz+5GZPWBmr5nZu3Fbvhw6W08xs0vL9pm60HkAAOgJFKUAul3Zl+jm20c68JoJZctPyyAmaoiZDZLUKOn7kuolDZBkITN1hpmNjn+cmEaRCQBApG/oAAAK4ceSDgkdAjXtDEl18eO7Jf1e0ipJLumdQJk6Y7SkqfHj2yU1hQoCAEBeUJQCyMInzezf3P3m0EFQsw6P71+S9Cl3fyNkmKy4+/GSjg8cAwCAHsXhuwB6Unnh8JNgKdAb7BDf/6soBSkAAEVBUQqgJz0j6er4ccnMPhcyDGpa//j+raApAABAt6MoBdDT/kvSu/Hjc8ysT2dXVDYQ0u1dXdbMbm9eJn6+iZmdEE9/zsxeN7N/mNkZZrZFq9d+wMx+aGYPmdmrZvaKmf3VzL7Yic90uJlda2bLzOyt+H6OmR2wEevYNs55p5k9a2Zvm9nq+Pl/ts5f4fVNcVs0xc/fY2bfNLO7zGxVPMrt7Rv72crWv7mZfcvMbovzvRW38V1m9j0ze38br2sZrbhs8sEVBtIavZF5ji977fHxtJKZ/dbMHo+3fWK9Fvl4PPrvrWa2Iv4cr5vZk2Z2hZkdYWYVB19q/izacD6pJN1W4bPc3up1VUffjQdOSgwQZmY7mtnPzGxJnO9lM/ubmX3dzDp02o6ZfdbMFsTb/814H/m9xYOWVWrDNtbzGTP7k5k9YWZvxOtabmYPmtnvzOxLZrZVRzK1sf5Eu5nZQDP7gUV/u6/Ef5+NZjbFzDbbiPWOidv+MYtGen7DzJbG0z7Wzms3ev/qYKYOj8TckWUt+jv/upndZGYr4/15Tbyt7zOzX5jZp82sXzvvdYCZXWhmi+N97U0ze9rM/mhmY9t5baX9dzczm2Fm/4y3X9V9DEAv4e7cuHHj1q03RYPPuKQl8fNLy6Yd38ZrJpQtM62d9d6+ERkqLqtokJnmZQZIurnseevbQklbxa87QNJzVZY9v0qmaWXLjZb06yrrWS9pagc+5/GSXq2yHpf0rKQDqqyjKV6uSdLOkh6usI5227yNdY+StLydfM8rOk+0WntVu43eyEzHl++PkqZIWldtvZL+t4NZ/iLpfV34LLe3et2lZfPqKqx3dNn8aZLGKDrvtq313yhp0ypt00/Sn6q8fp2kb7duwwrr2UzS/A5+5lO74d+a2yUNl7Ssyvs8JmnndtY3UNItHcj8W0n9umv/6uBnrbovbOR+s0vcHh3ZPvVtvMd7Jc3pwOvnS9qijXW03n+/rOi0j9brSO1j3Lhx6103BjoCkIWpkiYqOgRzmpn9wd3fDpyp3P8qGh34bkVfyJ+VtJOkk+L74ZJmmNlUSTco+hy/lXSXpLclfVzSJEWDx33bzG5w95vaec9TJB2pqCD7raSHJG2uqKg4StGRLNPM7AV3/1WlFZjZKZJmxE/fknSVpDslvSBp63hdn5E0SNLNZrafuy+ukmlTSf8naa/4s10laYWiL+qD2vk8lfLVS7pVUYEiSQ9I+oOkpyV9QNIXJB0oaRtJ883sU+5+e9kqrpC0KH58dXz/iKLe93IPb2y2Ml+Q9GlJr0iareiyM+sl7RtPa7aZoja+Q9J9kpZKel1R23xY0rHa0OaXKdq25Zo/ywRJzT3qZ1bI/nwXPku9pNMVXSrnIkn3xJlLkr6qqIg4VNFIxme1sY6Zko6OH7+pqLi5R1GblCR9RdL5kq5sJ8uPJTX3kq1UNFryI5LWKPoR6EOKfuA5qMOfrrr3K9p3hyoqvK+R9KKk3eLMO8bveYuZ1bv7q61XYGZbK/qsu8STFkv6s6RHFR3tsZeiInP7eJ191f4gVB3dvzIT9+b/WVF7SNF+eaWkJxSNZL2VpD0kfULRPlVpHZsq+iFvVDzpaUUF6iOK9rkPKSowd1O0H1xjZoe6+7sVVtfsQEX75npJFyv69/jNeB3PbvQHBVBbQlfF3Lhx6303bfh1e0nZtAvKpn+zwmtC9pS6pO9XWGagNvTyrVP05e05ScMqLHts2bqua+M9p7V6z4clbVdhuSMVfTl0RYXPjhWWGVm2zBJJu7bxnmMVFc4u6e9tLNPUKte3umEf2ETJHtcZkjapsNyZZcs8Lek9Xd32Hch2fKvP+09JQ9p5zcclbVll/nuV7GE8uAP7wOgOZL20bPm6CvNHt/osT1XaFyTtX7a/vKgKvaWKfphpXs9qSXtXWKauwv5yfKtl+kh6OZ7XVGkfb/U3tnsXtmXrHrWTKiwzQNJtZcv8so11XV22zH+1sb8OUPTDVPNyY7pj/+rgZ626L3R0WUU/LjTPmyepT5X17ClpmwrTf162jgsl9a+wTD9FhXjzcl/twP67UtKeXW0rbty41d6Nc0oBZOUcRQWWJJ1hZgNChmnlBnf/ceuJ7r5aUnMvZR9FvRsnu/tDFZb9naLD4STpkA6cu7dO0hfd/bkK67pG0s/ip5tL+lqF109V1FPzlqRx7v5YhWXk7gskTY+f7m9mH20n19Xu/vN2lumIcYp6liTpXkWFbqqXxN1/KGlB/HQHScd0w3tvDJc0wd1XVF3I/U53f7nK/NcV9Z417+PHdlvCjXNMpX3B3e+T9Mf46VaKitTWvlX2+GR3T/VAu3uT2u8dHKio51KSrq20j5etb7W7L2lnfR11hbv/usJ7rFH0o1dz7+hXzGzL8mXMbIQ29G5f4u7ntLG/Nq+ruZfztHYydWj/ytiHyh5f4u7r21rQ3Re7+wvl08xssKSvx09vcfeveYUjX9z9HUn/oagHVmq/rSTpRK9+NAeAXoqiFEAm4i+mM+Kn20k6NViYtIqHx8buLnu8StUPW7wrvu+vDYcAtuUGd3+kyvwZig5jk6TPls+IB4ZpPjTyWnd/vJ33+n3Z40+1s+wv25nfUeUjLZ/n7l5l2ellj7MeoflOd3+wO1bk7q9J+kf89CPdsc6N9IC731ll/q1lj/csn2Fm79GGfWOlosM7K/LoEOvUDzNl1rb1Pj3sZ23NcPdV2vB3sJmiw6zLlf+IcH61N3H3lyRdFz89KD6UtS3dtn91o/JLKu3V5lJt+4I2jIbdZptLLYVp848hu7YzQNNTinpuARQQ55QCyNJ5inr9tpb0HTP7jbu/GDiTJP29yrxVZY8bK/WetLFseyOK3lJtprs/a2b/lLS3pA+b2fvdvbl35kBt+FHxTTM7sp33Kh89c48qy61XdE5dd2juiXNJ7Z1f+zdtONcw62KuWhGXEBcfX1B0nu6+is6zHaDoHM7Wtu+WdBvn3nbmLy973Hr/3Fcb9pM72tnPpejw92GVZrj7K2Z2n6J94N/M7GpFP3bcGRcpPeEVRedrVnOrNvTw7afoPN9mH4/v35a0m5nt1s66Ni27/6CiQ3Qr6fD+laG7FP1wsJmkqfGPXLMrHQHSho+XPd6uA//+lO9reyg6pLtirnZ+vALQi1GUAshM/GX1XEnnKjq8b4qk/wybSlI0MFBbyq+LWW251su+p51l2+vdbF5mb0VFzwe04ZDBurJlvhzfOqpasfyCu7+5EeuqZnB8/2zcg9gmd3/XzJYqKoy2NrP+lQ4H7CHL219EMrN9FA38tGsH1/u+TifqvPYGSaq2fw4pe/yE2tfeMicp+uHlfYoOiz1S0utm9ndFRdHNku7uQPHbUUs7UNCU/80NaTWvLr7vrw2DanVUtb+pDu1fWXL3F83sW4rOBe2r6LDa08zsOUU/EN0p6S/u3lahXVf2+NKNfPuaaisA2eHwXQBZ+6WiEV0l6WQza/3lMHMb8cW4u75AS8lD6Nryetnj8nNw3996wY3Qv8q8tVXmbazma6O+XnWpDdZUeG0W2v3M8aisN2tDQfqMoi/0p0j6f4oOOf5sfGs+JDvE/69d2T/fW/Z4Y/fNFHdvUDRy62Xa0MbvlfRJRSP//lXSUjPrrnOIu/L3JNXG31S3cfeLFI2ue4s27DfbKfrx4GeSFpvZ3WZW6dzjQrUVgGzQUwogU+6+1sx+qOhL/WaKvqB+tTvfw8xq4Qe3zTuwTHmhsKaNx8e7++zuidStXpO0pZKfoZryIqFqz2oAJyv6wi5Fo4n+h7uvq7SgmZ2RWaruVV6wbey+WZG7PynpODM7UdHlXz4q6WOSDlb0t18n6XdmtmOlgcY2Ulf+npqfbympyd137mKWPGj330B3v0PSHWa2jaJDcg9QtG32i1//UUl3VbhUU3PbrZO0WVt/CwCwMWrhixuA3udiRdd5lKKRMD9UbeEyzYd0Vvu1XZK27VSqbHXkMzcv40pep6/8MLfODFSShZXx/QfMrGrPZ3zdxOaBoV7I8NDdjvq3+H6dpFPb+RK+UwZ5ekL56LAf7MDyHVlGkuTub7r7be7+I3f/tKIC/7uK9mtJOisujLpil3g/qqb8b671aLjNf1M7mFmIQ687ovzw6277N9DdX3D3a9z9u+4+StE1Xf8Qz+6n9MBPzW3VV9E1egGgyyhKAWQuHuzkrPhpX0k/6OBLX47v2zvkN8TIpxvrk9VmmtkHtGFQokfLBjmSokMfm7/QfyanPcP3xfem6PqX1XxUG3pK76u2YCCD4vsXql0WxsyGK7ocSjXlh9i2V0Rl6UFF1zGVohFl29unRnf2jdx9jbv/VNE5ulI0WNB+nV1f7P2SRrSzzCfKHt/fat4d8X0fSUd0MUtPebnscZv/BppZH0XXIu0Ud18u6Tht+CFspJltVrbIHWWPEyODA0Bn5fGLDIBimKMNl5WYoGiQm/Y0X79uJzOr1lPzza4Ey8gYM6s2Eu43FX1BlqT/K58RX17n+vjphxVdHzNvrip7/J12erG+28br8qL5fMXt2un1PavKvGblh4129NDmHhcPcHVj/HSIpKPbWtbMRquNkXc3UlPZ4+44najN62Ca2UBtuAbuWm34+2l2Wdnjs8wsN9umTPn1O6v9qDVB7f84UlV8NMCysknl2+cKbThq5VvxD2gA0CUUpQCCiEfKbD7/ziR9owMvK/8ieW6lQsfMfqANh1vmWV9Jf4y/LCeY2RGSvhM/fUPR+bet/Zc29Gz9sr0BY8xsRzM7z8y2q7ZcN1qgDYP+HCjpvEq9b2b2fW3omXpG0uXZxNsozb1qJumc1jMt8gNFg8S058myx+317GVtRtnjX5nZ3q0XiK8zeWm1lZjZcDM708wGVVlmW20ofF3Vr3vaUf/PzFLnp8cF5hxtGKDn4tY93u7+d234QeTDkua1k7+vmR1pZl9va5kecJM2XLv4JDNLHSpuZiW1c61hM/uSmf17q97P1suMkjQ8fvpE+Qja7v5M2XtsI+mGaqdgxH8fh9Tw+dYAMsBARwCCcff5ZvY3RYdvdqRn4hJFl5DZWtLnJd1pZpcruhTGjop6CEqKfsmf0COhu881ioqYR8xslqR/KBqs5TBFX9abC+7vxl8CE9x9oZl9TdIsRYc//s7Mvi3pWkWXvnhL0cAtuysqCveP1/mLHvtEyXzvmtmxku5WNKjNtyV9It5eyxQdEvsFRQPfSFGB/eVuvCRNd/qNpBMU9Vx/08zqFfVePytpB0Uj8A5X1JO1VtLIKuu6U9Fn7SfpdDNrLsiazxd80d2DHMLs7jeb2aWSjld0TuL98fO/KTrsuKSoHd4n6UpFf4NSetTf9ys6JH+qmd0dv/5RRQNYbS1pH0VttnW8/OXu/nQX4y9StL9fGF8382pFh7s2H0nQXMA9qQ0/hrV2Qrz8PooO9X3CzK5UdO3e5xVdRmewoh8TPhXnv7iLuTvM3VeY2R8kHRu/9/1m9htF+90ARYdUT5T0kqJrsrbVm7qrpKmKfsy6SdGPLs8o2ge3UzTo0ZHacKRGpUGovqdodOVDFPWaLzazaxWdWvCsov17kKIjYA5V1Pt+i6QfdeazA+j9KEoBhPY9Jc9RapO7r457BP9P0RfEA+NbufmKvoTmvSj9haIBQ06S9P0K813SD9z9V22twN0vjq8tOEvRF8D6+NaWFyRlVvS5+wNmdoiiHqjmL/OVegdflPT/Wo3wmRvuvsjMviHpV4qOMDoovpX7p6TPSPptO+t63szOV7TfD1D6fOo71IXzNbvBZEW5Pq/ob+yrSo6O/a6iXvxXtKEobT1acnOR2keV26rcn+L37KpXJP27or//w+Jba0slfcrdX620And/1cwOVPT39EVFPxK1dx3g1gMm9bRTFRXN9YoO0Z3aav5KRed5fq3KOpq3z3u14Rqylbwj6Ux3TxXe7v6OmR2u6PIxX1NUhH5eG/aJSrgOKYA2cfgugKDc/a9Kn99Vbfm/KPpC9r+SnlZ0btNqSbcp6kEY7+4duWZhcO5+sqSxkuYp+nL7dnz/R0kHuvu0DqxjnqSdFRUOcxX1eKzVhna5R9GhdkdIGuLuz3f7B6me7x5FPTOnKSq4Viv6svtCnO0MSbu4+w1Z5tpY7n6hoh9A/qyoJ+gdSc8p6gU8TVLJ3R/v4Lq+r6hH6/p4XbkZbdjd33H3oyUdpSjfakU9aE8rOrT6QHf/maLDNpu92Godf5W0m6Ji5U+Slig6l/bd+H6xoqMeDnb3L7p7t1yf0t0XKeqxPkfSw4qK5TWSHlD0w88wd3+inXW85u4TFP14MiN+7QuKRl5eI+kxRUc5nKZov+3IecTdxt1fVHRkyZQ42xpFl/NZrKgXct/4UORqfiRplKI2uV7Rub1rFX3GlxQNNnaupD3d/dwqWd52928oOhpjuqS/K9pf1ik67eBJSddpQ9sft/GfGEBRWHRaFwAAQMeY2VWSPhc/3SYulkLkaP4Sc4e7jw6RAQDQdfSUAgCADosHOxoXP30wVEEKAOg9KEoBAIAkycx2MbPtq8wfqmgQof7xpIsyCQYA6NUY6AgAADQ7QNL/mtlfFY0UvFTR+YbbKDoP8QuKBgCSpHslzQwREgDQu1CUAgCAcn0VXU6krUuKSNLtko5y9/VVlgEAoEMoSgEAQLO5kr4kaYyikWy3VXRNzLclrVI0wuoV8ajPAAB0i1yMvrvtttt6XV1d6Bg9ZvXq1Ro4cGDoGC3ylqcoaHcAAAAUVWNj4/PuXvHLcC56Suvq6tTQ0BA6BgAAAACgB5jZU23NY/TdDEybNi10hIS85SkK2h0AAABIy8Xhu6VSyXtzT6mZKQ/t3CxveYqCdgcAAEBRmVmju5cqzaOnFAAAAAAQDEUpAAAAACAYitIM5O3Q5LzlKQraHQAAAEijKAUAAAAABMNARxnI2wA3ectTFLQ7AAAAioqBjgAAAAAAudRuUWpml5jZc2b2cNm0P5rZovjWZGaL4ul1Zra2bN7/9GB2AAAAAECN69uBZS6V9CtJlzVPcPcvNj82s59JeqVs+aXuXt9N+XqFqVOnho6QkLc8RUG7AwAAAGkdOqfUzOokzXf3vVtNN0lPS/qkuz/W1nLt6e3nlAIAAABAkfXkOaUfl7TK3R8rm7azmT1gZneY2cerhJpsZg1m1rB69eouxsi3IUOGhI6QkLc8RUG7AwAAAGldLUonSppT9nylpB3dfbik0yT9wczeV+mF7j7T3UvuXho4cGAXY+TbypUrWx4/88wz+sQnPqE99thDe+21l37xi1+0zJs2bZqGDh2q+vp61dfX67rrrpMk3X333Ro2bJj2228/Pf7445Kkl19+WYcddlinRnMtz9MV11xzjRYvXtzy/KyzztLNN98sSRo9enSnr8t57bXXatiwYaqvr1epVNJdd90lSXrzzTe1//77a99999Vee+1Vc4fDttfu//rXv1q2fX19vd73vvdpxowZkrLbN7pLT+0bS5Ys0QEHHKBNN91U559/fmr++vXrNXz4cI0bN65l2qJFizRq1KiW/em+++7r1HsDAACgh7h7uzdJdZIebjWtr6RVkrav8rrbJZXaW//IkSO9N4uaObJixQpvbGx0d/dXX33Vd911V3/kkUfc3X3q1Kl+3nnnpV7/2c9+1h999FG/8cYb/bTTTnN399NOO81vv/32LufpiuOOO87//Oc/V5x38MEH+/3339+p9b722mv+7rvvurv7gw8+6Lvttpu7u7/77rv+2muvubv722+/7fvvv7/fc889nXqPEDam3detW+eDBg3ypqYmd89u3+guPbVvrFq1yu+77z7//ve/X7E9fvazn/nEiRN97NixLdMOPfRQv+6669zdfcGCBX7wwQd36r0BAADQeZIavI16sCs9pf8maYm7L2ueYGYDzaxP/PiDknaV9EQX3qNXGDFiRMvjwYMHtzzfYosttMcee2j58uVVX9+vXz+tXbtWb7zxhvr166elS5dq+fLlOvjgg9t8zfXXX6/dd99dH/vYx/TNb36zpeeoucet2d57762mpiZJ0pFHHqmRI0dqr7320syZM1uWGTBggM444wztu+++GjVqlFatWqW//e1vmjt3rk4//XTV19dr6dKlOv7443XllVemstx444064IADNGLECB199NFas2ZN1c87YMAARacrS6+//nrLYzPTgAEDJEnvvPOO3nnnnZZ5taB8P2jPLbfcol122UU77bRT1eW6e98o733M476x3Xbbab/99lO/fv1S85YtW6YFCxboP/7jPxLTzUyvvvqqJOmVV17hMGoAAICc6cglYeZIukfSbma2zMy+Es+aoOShu5J0kKSHzOxBSVdK+qq7v9idgWtRY2NjxelNTU164IEH9JGPfKRl2q9+9SsNGzZMJ5xwgl566SVJ0ve+9z1NnjxZM2bM0Mknn6wzzjhDP/zhD9t8vzfffFOTJk3SvHnzdOedd+rZZ59NzD/11FMrvu6SSy5RY2OjGhoadMEFF+iFF16QFBWGo0aN0oMPPqiDDjpIs2bN0kc/+lGNHz9e5513nhYtWqRddtml4jqff/55nXPOObr55pu1cOFClUol/fd//7ek6JDOuXPnVnzd1Vdfrd13311jx47VJZdc0jJ9/fr1qq+v13bbbadDDz000XZ519Z+UMkVV1yhiRMnJqZlsW+0JU/7RltOPfVU/fSnP9UmmyT/WZsxY4ZOP/107bDDDvrOd76jn/zkJxu1XgAAAPSsdotSd5/o7oPdvZ+7b+/uF8fTj3f3/2m17FXuvpe77+vuI9x9Xk8FryWTJ09OTVuzZo2OOuoozZgxQ+97X3Ta7de+9jUtXbpUixYt0uDBg/Xtb39bklRfX697771Xt912m5544gkNGTJE7q4vfvGLOuaYY7Rq1arEupcsWaKdd95Zu+66q8xMxxxzTGJ+pR4rSbrgggtaeryeeeYZPfZYNH5V//79W3rTRo4c2dJ71hH33nuvFi9erAMPPFD19fWaPXu2nnrqKUnSD37wA40fP77i6z772c9qyZIluuaaa3TmmWe2TO/Tp48WLVqkZcuW6b777tPDDz9c8fV5VGk/qOTtt9/W3LlzdfTRR7dMy2rfaEue9o1K5s+fr+22204jR45Mzbvwwgv185//XM8884x+/vOf6ytf+UqFNQAAACCUrg50hA6YNWtW4vk777yjo446Sl/60pf0uc99rmX6oEGD1KdPH22yySaaNGlSakAWd9c555yjM888U2effbbOPvtsHXPMMbrgggtS79nWYa19+/bV3//+95bnb775piTp9ttv180336x77rlHDz74oIYPH94yr1+/fi3r69Onj9atW9fhz+7uOvTQQ7Vo0SItWrRIixcv1sUXX9zh1x900EFaunSpnn/++cT0LbfcUqNHj9b111/f4XWF1no/aMtf/vIXjRgxQoMGDWqZltW+8e6777Y8z/u+Ue7uu+/W3LlzVVdXpwkTJujWW29tKbhnz57d8nd29NFHM9ARAABAzlCUZszd9ZWvfEV77LGHTjvttMS88tFZr776au29d/Jyr7Nnz9bYsWO11VZb6Y033tAmm2yiTTbZRG+88UZiud13311PPvmkli5dKkmaM2fDUdZ1dXUtjxcuXKgnn3xSUnSu3VZbbaXNN99cS5Ys0b333tvuZ9liiy302muvVV1m1KhRuvvuu1tGhn3jjTf06KOPVn3N448/3jJy7MKFC/X2229rm2220erVq/Xyyy9LktauXaubb75Zu+++e7s5a82cOXNSh+5mtW8sXLhQUn73jbb85Cc/0bJly9TU1KQrrrhCn/zkJ/X73/9eUnQpnjvuuEOSdOutt2rXXXft1HsAAACgZ/QNHaBW1E1ZUHV+0/SxHVrP3Xffrd/97nfaZ599VF9fL0n68Y9/rMMPP1z/+Z//qUWLFsnMVFdXp4suuqjldW+88YZmz56tG2+8UZJ02mmn6aijjlL//v0ThYUkvec979HMmTM1duxYbbvttvrYxz7WcpjrUUcdpWOPPVb19fXab7/99OEPf1iSNGbMGP3P//yPhg0bpt12202jRo1q97NMmDBBkyZN0gUXXNDmIcEDBw7UpZdeqokTJ+qtt96SJJ1zzjn68Ic/rLPOOkulUil1mOZVV12lyy67TP369dNmm22mP/7xjzIzrVy5Uscdd5zWr1+vd999V1/4whcSl/7oDd544w3ddNNNiW0vKbN947LLLsv1vvHss8+qVCrp1Vdf1SabbKIZM2Zo8eLFLYfAVzJr1iydcsopWrduXcvnBwAAQH5Yc49USKVSyTt73cKsdKUoXbFiRdARP2+//Xadf/75mj9/fi7yFFUe2731vgEAAAD0BDNrdPdSpXkcvpuBjRl1NQt5y1MUtDsAAACQRk9pB3Wlp9TMlId2bpa3PEVBuwMAAKCo6CkFAAAAAOQSRSkAAAAAIBiK0gy0Hkk1tLzlKQraHQAAAEjjnNIO6q5LwgAAAABA0XBOaWBmFjpCQt7yFAXtDgAAAKRRlAIAAAAAgqEoBQAAAAAEQ1GagXHjxoWOkJC3PEVBuwMAAABpFKUZmDdvXugICXnLUxS0OwAAAJBGUZqBI444InSEhLzlKQraHQAAAEijKM3A/PnzQ0dIyFueoqDdAQAAgDSKUqSYmY499tiW5+vWrdPAgQM3+pzI0aNHq/n6s4cffrhefvnl7owpSXr66af1qU99SnvssYf23HNPNTU1SZKOP/547bzzzqqvr1d9fb0WLVrU7e8NAAAAoOv6hg6A/Hnve9+rhx9+WGvXrtVmm22mm266SUOHDu3SOq+77rpuSpf05S9/WWeccYYOPfRQrVmzRptssuF3lvPOO0+f//zne+R9AQAAAHQPekoz4O6hIyR0JM+nP/1pLViwQJI0Z84cTZw4sWXe66+/rhNOOEH77befhg8frmuvvVaStHbtWk2YMEHDhg3TF7/4Ra1du7blNXV1dXr++eclSUceeaRGjhypvfbaSzNnzmxZZsCAATrjjDO07777atSoUVq1alXVjIsXL9a6det06KGHtrx+880372ArZC9v+wEAAACQBxSlGSgvvPKgI3kmTJigK664Qm+++aYeeughfeQjH2mZ96Mf/Uif/OQndf/99+u2227T6aefrtdff10XXnihNt98cz300EM644wz1NjYWHHdl1xyiRobG9XQ0KALLrhAL7zwgqSo2B01apQefPBBHXTQQZo1a5Ykae7cuTrrrLNS63n00Ue15ZZb6nOf+5yGDx+u008/XevXr2+Zf8YZZ2jYsGH61re+pbfeemuj2qgn5G0/AAAAAPKAojQDJ554YugICR3JM2zYMDU1NWnOnDk6/PDDE/NuvPFGTZ8+XfX19Ro9erTefPNNPf300/rrX/+qY445puX1w4YNq7juCy64oKU39JlnntFjjz0mSerfv3/LeasjR45sOT90/Pjx+sEPfpBaz7p163TnnXfq/PPP1/33368nnnhCl156qSTpJz/5iZYsWaL7779fL774os4999wOtU1Pytt+AAAAAOQBRSnaNH78eH3nO99JHLorRYehXnXVVVq0aJEWLVqkp59+WnvssYekaJCkam6//XbdfPPNuueee/Tggw9q+PDhevPNNyVJ/fr1a3l9nz59tG7duqrr2n777TV8+HB98IMfVN++fXXkkUdq4cKFkqTBgwfLzLTpppvq3//933Xfffd1qg0AAAAA9CyKUrTphBNO0FlnnaV99tknMf2www7TL3/5y5ZzJB944AFJ0kEHHaTLL79ckvTwww/roYceSq3zlVde0VZbbaXNN99cS5Ys0b333tvpfPvtt59eeuklrV69WpJ06623as8995QkrVy5UlJUQF9zzTXae++9O/0+AAAAAHoORWkG5s6dGzpCQkfzbL/99jrllFNS088880y98847GjZsmPbee2+deeaZkqSvfe1rWrNmjYYNG6af/vSn2n///VOvHTNmjNatW6dhw4bpzDPP1KhRozqUt9I5pX369NH555+vQw45RPvss4/cXZMmTZIkfelLX9I+++yjffbZR88//7z+67/+q0OfuSflbT8AAAAA8sDyMCJoqVTy5utZ5lXdlAVV5zdNH9vmvBUrVmjIkCHdHanT8panKGh3AAAAFJWZNbp7qdI8ekoz0NVrfHa3vOUpCtodAAAASKMoBQAAAAAEQ1EKAAAAAAiGojQDzYPv5EXe8hQF7Q4AAACkMdBRB3VloCMAAAAAKDIGOgps5MiRoSMk5C1PUdDuAAAAQBpFaQYWLlwYOkJC3vIUBe0OAAAApFGUAgAAAACCoSjNwODBg0NHSMhbnqKg3QEAAIA0itIMrFixInSEhLzlKQraHQAAAEijKM3AtGnTQkdIyFueoqDdAQAAgDQuCdNBXbkkjJkpD+3cLG95ioJ2BwAAQFFxSRgAAAAAQC5RlAIAAAAAgqEozUDeDk3OW56ioN0BAACANIpSAAAAAEAwFKUZKJUqns8bTN7yFAXtDgAAAKRRlAIAAAAAgqEoBQAAAAAEQ1GagalTp4aOkJC3PEVBuwMAAABp5u6hM6hUKnneRyatm7Kg6vym6WMzSgIAAAAAtcXMGt294iAr9JRmYMiQIaEjJOQtT1HQ7gAAAEAaRWkGVq5cGTpCQt7yFAXtDgAAAKRRlAIAAAAAgqEozcCIESNCR0jIW56ioN0BAACAtHaLUjO7xMyeM7OHy6ZNM7PlZrYovh1eNu97Zva4mf3LzA7rqeC1pLGxMXSEhLzlKQraHQAAAEjrSE/ppZLGVJj+c3evj2/XSZKZ7SlpgqS94tf8xsz6dFfYWjV58uTQERLylqcoaHcAAAAgrd2i1N3/KunFDq7vM5KucPe33P1JSY9L2r8L+XqFWbNmhY6QkLc8RUG7AwAAAGldOaf0ZDN7KD68d6t42lBJz5QtsyyeBgAAAABASmeL0gsl7SKpXtJKST+Lp1uFZb3SCsxsspk1mFnD6tWrOxkDAAAAAFDLOlWUuvsqd1/v7u9KmqUNh+guk7RD2aLbS1rRxjpmunvJ3UsDBw7sTIyasXz58tAREvKWpyhodwAAACCtU0WpmQ0ue/pZSc0j886VNMHMNjWznSXtKum+rkWsfXkbdTVveYqCdgcAAADS+ra3gJnNkTRa0rZmtkzSVEmjzaxe0aG5TZJOlCR3f8TM/iRpsaR1kk5y9/U9kryGjB8/Xu4Vj2IOIm95ioJ2BwAAANLaLUrdfWKFyRdXWf5Hkn7UlVAAAAAAgGLoyui7AAAAAAB0CUVpBi666KLQERLylqcoaHcAAAAgzfJwjlupVPKGhobQMaqqm7Kg6vym6WMzSgIAAAAAtcXMGt29VGkePaUZMKt0+dZw8panKGh3AAAAII2iFAAAAAAQDEUpAAAAACAYitIMjBs3LnSEhLzlKQraHQAAAEijKM3AvHnzQkdIyFueoqDdAQAAgDSK0gwcccQRoSMk5C1PUdDuAAAAQBpFaQbmz58fOkJC3vIUBe0OAAAApFGUAgAAAACCoSgFAAAAAARDUZoBdw8dISFveYqCdgcAAADSKEozMHPmzNAREvKWpyhodwAAACDN8tB7UyqVvKGhIXSMquqmLKg6v2n62DbnmVmuesnylqcoaHcAAAAUlZk1unup0jx6SgEAAAAAwVCUAgAAAACCoSjNwNy5c0NHSMhbnqKg3QEAAIA0itIMjBw5MnSEhLzlKQraHQAAAEijKM3A0KFDQ0dIyFueoqDdAQAAgDSKUgAAAABAMBSlAAAAAIBgKEozMGnSpNAREvKWpyhodwAAACDN3D10BpVKJW9oaAgdo6q6KQuqzm+aPjajJAAAAABQW8ys0d1LlebRU5qBvI26mrc8RUG7AwAAAGkUpRlYuHBh6AgJectTFLQ7AAAAkEZRCgAAAAAIhqI0A4MHDw4dISFveYqCdgcAAADSKEozsGLFitAREvKWpyhodwAAACCNojQD06ZNCx0hIW95ioJ2BwAAANK4JEwHdeWSMGamPLRzs7zlKQraHQAAAEXFJWEAAAAAALlEUQoAAAAACIaiNAN5OzQ5b3mKgnYHAAAA0ihKAQAAAADBUJRmoFSqeD5vMHnLUxS0OwAAAJBGUQoAAAAACIaiFAAAAAAQDEVpBqZOnRo6QkLe8hQF7Q4AAACkmbuHzqBSqeR5H5m0bsqCqvObpo/NKAkAAAAA1BYza3T3ioOs0FOagSFDhoSOkJC3PEVBuwMAAABpFKUZWLlyZegICXnLUxS0OwAAAJBGUQoAAAAACIaiNAMjRowIHSEhb3mKgnYHAAAA0ihKM9DY2Bg6QkLe8hQF7Q4AAACkUZRmYPLkyaEjJOQtT1HQ7gAAAEAal4TpoK5cEsbMlId2bpa3PEVBuwMAAKCouCQMAAAAACCXKEoBAAAAAMFQlGZg+fLloSMk5C1PUdDuAAAAQBpFaQbyNupq3vIUBe0OAAAApLVblJrZJWb2nJk9XDbtPDNbYmYPmdnVZrZlPL3OzNaa2aL49j89mL1mjB8/PnSEhLzlKQraHQAAAEjrSE/ppZLGtJp2k6S93X2YpEclfa9s3lJ3r49vX+2emAAAAACA3qjdotTd/yrpxVbTbnT3dfHTeyVt3wPZAAAAAAC9XHecU3qCpL+UPd/ZzB4wszvM7OPdsP6ad9FFF4WOkJC3PEVBuwMAAABp5u7tL2RWJ2m+u+/davoZkkqSPufubmabShrg7i+Y2UhJ10jay91frbDOyZImS9KOO+448qmnnurqZ+lRdVMWVJ3fNH1sRkkAAAAAoLaYWaO7lyrN63RPqZkdJ2mcpC95XNm6+1vu/kL8uFHSUkkfrvR6d5/p7iV3Lw0cOLCzMWqCmYWOkJC3PEVBuwMAAABpnSpKzWyMpO9KGu/ub5RNH2hmfeLHH5S0q6QnuiMoAAAAAKD36dveAmY2R9JoSdua2TJJUxWNtruppJvi3p9745F2D5L0AzNbJ2m9pK+6+4sVVwwAAAAAKLx2i1J3n1hh8sVtLHuVpKu6Gqq3GTduXOgICXnLUxS0OwAAAJDWHaPvoh3z5s0LHSEhb3mKgnYHAAAA0ihKM3DEEUeEjpCQtzxFQbsDAAAAaRSlGZg/f37oCAl5y1MUtDsAAACQRlEKAAAAAAiGohQAAAAAEAxFaQbcPXSEhLzlKQraHQAAAEijKM3AzJkzQ0dIyFueoqDdAQAAgDTLQ+9NqVTyhoaG0DGqqpuyoOr8pulj25xnZrnqJctbnqKg3QEAAFBUZtbo7qVK8+gpBQAAAAAEQ1EKAAAAAAiGojQDc+fODR0hIW95ioJ2BwAAANIoSjMwcuTI0BES8panKGh3AAAAII2iNANDhw4NHSEhb3mKgnYHAAAA0ihKAQAAAADBUJQCAAAAAIKhKM3ApEmTQkdIyFueoqDdAQAAgDRz99AZVCqVvKGhIXSMquqmLKg6v2n62IySAAAAAEBtMbNGdy9VmkdPaQbyNupq3vIUBe0OAAAApFGUZmDhwoWhIyTkLU9R0O4AAABAGkUpAAAAACAYitIMDB48OHSEhLzlKQraHQAAAEijKM3AihUrQkdIyFueoqDdAQAAgDSK0gxMmzYtdISEvOUpCtodAAAASOOSMB3UlUvCmJny0M7N8panKGh3AAAAFBWXhAEAAAAA5BJFKQAAAAAgGIrSDOTt0OS85SkK2h0AAABIoygFAAAAAARDUZqBUqni+bzB5C1PUdDuAAAAQBpFKQAAAAAgGIpSAAAAAEAwFKUZmDp1augICXnLUxS0OwAAAJBm7h46g0qlkud9ZNK6KQuqzm+aPjajJAAAAABQW8ys0d0rDrJCT2kGhgwZEjpCQt7yFAXtDgAAAKRRlGZg5cqVoSMk5C1PUdDuAAAAQBpFKQAAAAAgGIrSDIwYMSJ0hIS85SkK2h0AAABIoyjNQGNjY+gICXnLUxS0OwAAAJBGUZqByZMnh46QkLc8RUG7AwAAAGlcEqaDunJJGDNTHtq5Wd7yFAXtDgAAgKLikjAAAAAAgFyiKAUAAAAABENRmoHly5eHjpCQtzxFQbsDAAAAaRSlGcjbqKt5y1MUtDsAAACQRlGagfHjx4eOkJC3PEVBuwMAAABpFKUAAAAAgGAoSgEAAAAAwVCUZuCiiy4KHSEhb3mKgnYHAAAA0szdQ2dQqVTyhoaG0DGqqpuyoOr8puljM0oCAAAAALXFzBrdvVRpHj2lGTCz0BES8panKGh3AAAAII2iFAAAAAAQTLtFqZldYmbPmdnDZdO2NrObzOyx+H6rsnnfM7PHzexfZnZYTwUHAAAAANS+jvSUXippTKtpUyTd4u67Srolfi4z21PSBEl7xa/5jZn16ba0NWrcuHGhIyTkLU9R0O4AAABAWrtFqbv/VdKLrSZ/RtLs+PFsSUeWTb/C3d9y9yclPS5p/+6JWrvmzZsXOkJC3vIUBe0OAAAApHX2nNJB7r5SkuL77eLpQyU9U7bcsnhaoR1xxBGhIyTkLU9R0O4AAABAWncPdFRpeNGK15wxs8lm1mBmDatXr+7mGPkyf/780BES8panKGh3AAAAIK2zRekqMxssSfH9c/H0ZZJ2KFtue0krKq3A3We6e8ndSwMHDuxkDAAAAABALetsUTpX0nHx4+MkXVs2fYKZbWpmO0vaVdJ9XYsIAAAAAOit+ra3gJnNkTRa0rZmtkzSVEnTJf3JzL4i6WlJR0uSuz9iZn+StFjSOkknufv6HspeM9wrHsEcTN7yFAXtDgAAAKR1ZPTdie4+2N37ufv27n6xu7/g7oe4+67x/Ytly//I3Xdx993c/S89G782zJw5M3SEhLzlKQraHQAAAEizPPTelEolb2hoCB2jqropC6rOb5o+ts15ZparXrK85SkK2h0AAABFZWaN7l6qNK+7R98FAAAAAKDDKEoBAAAAAMFQlGZg7ty5oSMk5C1PUdDuAAAAQBpFaQZGjhwZOkJC3vIUBe0OAAAApFGUZmDo0KGhIyTkLU9R0O4AAABAGkUpAAAAACAYilIAAAAAQDAUpRmYNGlS6AgJectTFLQ7AAAAkGbuHjqDSqWSNzQ0hI5RVd2UBVXnN00fm1ESAAAAAKgtZtbo7qVK8+gpzUDeRl3NW56ioN0BAACANIrSDCxcuDB0hIS85SkK2h0AAABIoygFAAAAAARDUZqBwYMHh46QkLc8RUG7AwAAAGkUpRlYsWJF6AgJectTFLQ7AAAAkEZRmoFp06aFjpCQtzxFQbsDAAAAaVwSpoO6ckkYM1Me2rlZ3vIUBe0OAACAouKSMAAAAACAXKIoBQAAAAAEQ1Gagbwdmpy3PEVBuwMAAABpFKUAAAAAgGAoSjNQKlU8nzeYvOUpCtodAAAASKMoBQAAAAAEQ1EKAAAAAAiGojQDU6dODR0hIW95ioJ2BwAAANLM3UNnUKlU8ryPTFo3ZUHV+U3Tx2aUBAAAAABqi5k1unvFQVboKc3AkCFDQkdIyFueoqDdAQAAgDSK0gysXLkydISEvOUpCtodAAAASKMoBQAAAAAEQ1GagREjRoSOkJC3PEVBuwMAAABpFKUZaGxsDB0hIW95ioJ2BwAAANIoSjMwefLk0BES8panKGh3AAAAII1LwnRQVy4JY2bKQzs3y1ueoqDdAQAAUFRcEgYAAAAAkEsUpQAAAACAYChKM7B8+fLQERLylqcoaHcAAAAgjaI0A3kbdTVveYqCdgcAAADSKEozMH78+NAREvKWpyhodwAAACCtb+gAvUV7o/MCAAAAANLoKQUAAAAABENRmoGtDzs5dISEiy66KHSEQqLdAQAAgDSK0gxsUT8mdISEyZMnh45QSLQ7AAAAkEZRmoGnzh0XOkKCmYWOUEi0OwAAAJBGUQoAAAAACIaiFAAAAAAQDEVpBjbbZb/QERLGjcvX4cRFQbsDAAAAaRSlGdju81NDR0iYN29e6AiFRLsDAAAAaRSlGXjuyrNDR0g44ogjQkcoJNodAAAASKMozcDapfeHjpAwf/780BEKiXYHAAAA0ihKAQAAAADBUJQCAAAAAILp29kXmtlukv5YNumDks6StKWkSZJWx9O/7+7XdfZ9eoOdvpuvwzbdPXSEQqLdAQAAgLRO95S6+7/cvd7d6yWNlPSGpKvj2T9vnlf0glSSXlt0fegICTNnzgwdoZBodwAAACCtuw7fPUTSUnd/qpvW16u8eMOvQkdIOPHEE0NHKCTaHQAAAEjrrqJ0gqQ5Zc9PNrOHzOwSM9uqm94DAAAAANDLdLkoNbP+ksZL+nM86UJJu0iql7RS0s/aeN1kM2sws4bVq1dXWgQAAAAA0Mt1R0/ppyUtdPdVkuTuq9x9vbu/K2mWpP0rvcjdZ7p7yd1LAwcO7IYY+TXwqDNDR0iYO3du6AiFRLsDAAAAad1RlE5U2aG7Zja4bN5nJT3cDe9R0/oP+lDoCAkjR44MHaGQaHcAAAAgrUtFqZltLulQSf9XNvmnZvYPM3tI0ickfasr79EbLP/NcaEjJAwdOjR0hEKi3QEAAIC0Tl+nVJLc/Q1J27SadmyXEgEAAAAACqO7Rt8FAAAAAGCjUZRmYMC+h4WOkDBp0qTQEQqJdgcAAADSKEozsM2Yb4SOkDBz5szQEQqJdgcAAADSKEozsPLSU0JHSGAU2DBodwAAACCNojQDb69aGjpCwsKFC0NHKCTaHQAAAEijKAUAAAAABENRmoE+A7YOHSFh8ODBoSMUEu0OAAAApFGUZmD7ky4LHSFhxYoVoSMUEu0OAAAApFGUZuDluy4PHSFh2rRpoSMUEu0OAAAApJm7h86gUqnkDQ0NoWNUVTdlQadf+9S545SHdm5mZrnKUxS0OwAAAIrKzBrdvVRpHj2lAAAAAIBgKEoBAAAAAMFQlGbgA8fNCB0hIe+HSvdWtDsAAACQRlEKAAAAAAiGojQDz84+NXSEhFKp4vnF6GG0OwAAAJBGUQoAAAAACIaiFAAAAAAQDEVpBt5/4MTQERKmTp0aOkIh0e4AAABAmrl76AwqlUqe95FJ66Ys6NLrm6aP7aYkAAAAAFBbzKzR3SsOskJPaQaW/frLoSMkDBkyJHSEQqLdAQAAgDSK0gysX/Ni6AgJK1euDB2hkGh3AAAAII2iFAAAAAAQDEVpBvoP2iV0hIQRI0aEjlBItDsAAACQRlGagcHH/yJ0hITGxsbQEQqJdgcAAADSKEoz8ML1vwwdIWHy5MmhIxQS7Q4AAACkUZRmYM2DN4SOkDBr1qzQEQqJdgcAAADSKEoBAAAAAMFQlAIAAAAAgqEozcDQr88OHSFh+fLloSMUEu0OAAAApFGUZuDtVY+HjpDAKLBh0O4AAABAGkVpBlZf9cPQERLGjx8fOkIh0e4AAABAGkUpAAAAACAYilIAAAAAQDAUpRnY+rCTQ0dIuOiii0JHKCTaHQAAAEgzdw+dQaVSyRsaGkLHqKpuyoIeXX/T9LE9un4AAAAACMXMGt29VGkePaUZeOrccaEjJJhZ6AiFRLsDAAAAaRSlAAAAAIBgKEoBAAAAAMFQlGZgs132Cx0hYdy4fB1OXBS0OwAAAJBGUZqB7T4/NXSEhHnz5oWOUEi0OwAAAJBGUZqB5648O3SEhCOOOCJ0hEKi3QEAAIC0vqEDFMHapfe3u0x7l5zpzkvGzJ8/v9vWhY6j3QEAAIA0ekoBAAAAAMFQlAIAAAAAgqEozcBO383XYZvuHjpCIdHuAAAAQBpFaQZeW3R96AgJM2fODB2hkGh3AAAAII2iNAMv3vCr0BESTjzxxNARCol2BwAAANIoSgEAAAAAwVCUAgAAAACCoSjNwMCjzgwdIWHu3LmhIxQS7Q4AAACkUZRmoP+gD4WOkDBy5MjQEQqJdgcAAADSKEozsPw3x4WOkDB06NDQEQqJdgcAAADS+nblxWbWJOk1SeslrXP3kpltLemPkuokNUn6gru/1LWYAAAAAIDeqDt6Sj/h7vXuXoqfT5F0i7vvKumW+DkAAAAAACk9cfjuZyTNjh/PlnRkD7xHTRmw72GhIyRMmjQpdIRCot0BAACANHP3zr/Y7ElJL0lySRe5+0wze9ndtyxb5iV336rCaydLmixJO+6448innnqq0zmyUDdlQdD3b5o+Nuj7AwAAAEBnmVlj2dG1CV3tKT3Q3UdI+rSkk8zsoI6+0N1nunvJ3UsDBw7sYox8W3npKaEjJDAKbBi0OwAAAJDWpaLU3VfE989JulrS/pJWmdlgSYrvn+tqyFr39qqloSMkLFy4MHSEQqLdAQAAgLROF6Vm9l4z26L5saRPSXpY0lxJzddAOU7StV0NCQAAAADonbpySZhBkq42s+b1/MHdrzez+yX9ycy+IulpSUd3PWZt6zNg69AREgYPHhw6QiHR7gAAAEBap4tSd39C0r4Vpr8g6ZCuhOpttj/psh5/j44MxNQ8WNKKFSt6Og4qoN0BAACAtJ64JAxaefmuy0NHSJg2bVroCIVEuwMAAABpFKUZeOXuOaEjJJx99tmhIxQS7Q4AAACkdeWcUmQo9HVSAQAAAKAn0FMKAAAAAAiGojQDHzhuRugICQ0NDaEjFBLtDgAAAKRRlAIAAAAAgqEozcCzs08NHSGhVCqFjlBItDsAAACQRlEKAAAAAAiGohQAAAAAEAxFaQbef+DE0BESpk6dGjpCIdHuAAAAQJq5e+gMKpVKnveRSXvDdUKbpo8NHQEAAABAAZlZo7tXHGSFntIMLPv1l0NHSBgyZEjoCIVEuwMAAABpFKUZWL/mxdARElauXBk6QiHR7gAAAEAaRSkAAAAAIBiK0gz0H7RL6AgJI0aMCB2hkGh3AAAAII2iNAODj/9F6AgJjY2NoSMUEu0OAAAApFGUZuCF638ZOkLC5MmTQ0coJNodAAAASKMozcCaB28IHSFh1qxZoSMUEu0OAAAApFGUAgAAAACCoSgFAAAAAARDUZqBoV+fHTpCwvLly0NHKCTaHQAAAEijKM3A26seDx0hgVFgw6DdAQAAgDSK0gysvuqHoSMkjB8/PnSEQqLdAQAAgDSKUgAAAABAMBSlAAAAAIBgKEozsPVhJ4eOkHDRRReFjlBItDsAAACQRlGagS3qx4SOkDB58uTQEQqJdgcAAADSKEoz8NS540JHSDCz0BEKiXYHAAAA0ihKAQAAAADBUJQCAAAAAIKhKM3AZrvsFzpCwrhx+TqcuChodwAAACCNojQD231+augICfPmzQsdoZBodwAAACCNojQDz115dugICUcccUToCIVEuwMAAABpFKUZWLv0/tAREubPnx86QiHR7gAAAEAaRSkAAAAAIJi+oQMgO3VTFlR83Kxp+tgs4wAAAAAAPaVZ2Om7+TpsM295isLdQ0cAAAAAcoeiNAOvLbo+dISEvOUpipkzZ4aOAAAAAOQORWkGXrzhV6EjJOQtT1GceOKJoSMAAAAAuUNRCgAAAAAIhoGO0KLS4EflGAgJAAAAQHejpzQDA486M3SEhLzlKYq5c+eGjgAAAADkDkVpBvoP+lDoCAl5y1MUI0eODB0BAAAAyB2K0gws/81xoSMk5C1PUQwdOjR0BAAAACB3KEoBAAAAAMFQlAIAAAAAgqEozcCAfQ8LHSEhb3mKYtKkSaEjAAAAALlDUZqBbcZ8I3SEhLzlKYqZM2eGjgAAAADkDkVpBlZeekroCAl5y1MUjL4LAAAApFGUZuDtVUtDR0jIW56iWLhwYegIAAAAQO5QlAIAAAAAgul0UWpmO5jZbWb2TzN7xMxOiadPM7PlZrYovh3efXFrU58BW4eOkJC3PEUxePDg0BEAAACA3Onbhdeuk/Rtd19oZltIajSzm+J5P3f387ser3fY/qTLQkdIyFueolixYkXoCAAAAEDudLqn1N1XuvvC+PFrkv4paWh3BetNXr7r8tAREvKWpyimTZsWOgIAAACQO91yTqmZ1UkaLunv8aSTzewhM7vEzLbqjveoZa/cPSd0hIS85SmKs88+O3QEAAAAIHe6cviuJMnMBki6StKp7v6qmV0o6YeSPL7/maQTKrxusqTJkrTjjjt2NQZ6gbopC6rOb5o+NqMkAAAAALLSpZ5SM+unqCC93N3/T5LcfZW7r3f3dyXNkrR/pde6+0x3L7l7aeDAgV2JAQAAAACoUV0ZfdckXSzpn+7+32XTy4cY/aykhzsfr3f4wHEzQkdIyFueomhoaAgdAQAAAMidrhy+e6CkYyX9w8wWxdO+L2mimdUrOny3SdKJXXgPAAAAAEAv1umi1N3vkmQVZl3X+Ti907OzT9VO350fOkaLvOXpqFo/57RUKsndQ8cAAAAAcqVbRt8FAAAAAKAzKEoBAAAAAMFQlGbg/QdODB0hIW95imLq1KmhIwAAAAC5Q1GagS0/9qXQERLylqcopk2bFjoCAAAAkDtdGX0XHbTs11/W9iddFjpGi7zl6U2qDca07Ndf1rrXXsgwDQAAAJB/9JRmYP2aF0NHSMhbnqKg3QEAAIA0ilIAAAAAQDAcvpuB/oN2CR0hobN5av06oaHlbT8AAAAA8oCe0gwMPv4XoSMk5C1PUdDuAAAAQBo9pRl44fpfapsx3wgdo0WoPO31tOYhQ0/29r5w/S8lepMBAACABHpKM7DmwRtCR0jIW56ioN0BAACANHpKgY2Qh95eAAAAoDehpxQAAAAAEAxFaQaGfn126AgJectTFLQ7AAAAkEZRmoG3Vz0eOkJC3vIUBe0OAAAApFGUZmD1VT8MHSEhb3mKgnYHAAAA0hjoCMhQyEvSAAAAAHlETykAAAAAIBiK0gxsfdjJoSMk5C1PUdDuAAAAQBpFaQa2qB8TOkJC3vIUBe0OAAAApFGUZuCpc8eFjpCQtzxFQbsDAAAAaQx0hF6jvUGEslpHT74/AyEBAACgt6GnFAAAAAAQDD2lGdhsl/1CR0jIW56iyEO7d6QnmN5YAAAAZIme0gxs9/mpoSMk5C1PUdDuAAAAQBpFaQaeu/Ls0BES8panKGh3AAAAII3DdzOwdun9oSMk9FSe0IME5V3e9oO2MNgSAAAAskRPKQAAAAAgGHpKASBn6K0GAABFQk9pBnb67vzQERLylqcoaHcAAAAgjaI0A68tuj50hIS85SkK2h0AAABI4/DdDLx4w6+0Rf2Y0DFa5C1PURSl3Xv60NOuDqjFoa8AAAD5Qk8pAAAAACAYekqBGsJld7quI21IbyoAAEB26CnNwMCjzgwdISFveYqCdgcAAADSKEoz0H/Qh0JHSMhbnqKg3QEAAIA0Dt/NwPLfHJery4HkLU9R9JZ27+lDiDlEGQAAoFjoKQUAAAAABENPKYBM0RMKAACAcvSUZmDAvoeFjpCQtzxFQbsDAAAAafSUZmCbMd8IHSEhb3mKgnYvjvZ6g7t6yZmeXj8AAECW6CnNwMpLTwkdISFveYqCdgcAAADSKEoz8PaqpaEjJOQtT1HQ7gAAAEAah+8CQMF0x2BTvf0Q5LznAwCgN6GnNAN9BmwdOkJC3vIUBe0OAAAApNFTmoHtT7osdISEvOUpCtq9doQeqKg3KMJn7Gn01gIAioKe0gy8fNfloSMk5C1PUdDuAAAAQBpFaQZeuXtO6AgJectTFLQ7AAAAkMbhuwCwkTg0tefbgENX20cbAQB6C3pKAQAAAADB0FOagQ8cNyN0hIS85SkK2h1ZoSe353WkjdvrqSxCb3MeMvSkPFxeCQB6gx7rKTWzMWb2LzN73Mym9NT7AAAAAABqV4/0lJpZH0m/lnSopGWS7jezue6+uCfeL++enX2qdvru/NAxWuQtT1HQ7kD36WoPXBa9yXnvsa6FXsxayJh3oS9xxTbKhyJsp97+GbvjCJ0866me0v0lPe7uT7j725KukPSZHnovAAAAAECN6qmidKikZ8qeL4unAQAAAADQwty9+1dqdrSkw9z9P+Lnx0ra392/UbbMZEmT46e7SfpXN7z1tpKe74b1IBy2Ye/Adqx9bMPaxzasfWzD2sc27B3Yjt1jJ3cfWGlGT42+u0zSDmXPt5e0onwBd58paWZ3vqmZNbh7qTvXiWyxDXsHtmPtYxvWPrZh7WMb1j62Ye/Adux5PXX47v2SdjWznc2sv6QJkub20HsBAAAAAGpUj/SUuvs6MztZ0g2S+ki6xN0f6Yn3AgAAAADUrp46fFfufp2k63pq/W3o1sOBEQTbsHdgO9Y+tmHtYxvWPrZh7WMb9g5sxx7WIwMdAQAAAADQET11TikAAAAAAO3qNUWpmY0xs3+Z2eNmNiV0HlRmZpeY2XNm9nDZtK3N7CYzeyy+36ps3vfibfovMzssTGqUM7MdzOw2M/unmT1iZqfE09mONcLM3mNm95nZg/E2PDuezjasMWbWx8weMLP58XO2YY0xsyYz+4eZLTKzhnga27GGmNmWZnalmS2J/288gG1YO8xst/jvr/n2qpmdyjbMVq8oSs2sj6RfS/q0pD0lTTSzPcOmQhsulTSm1bQpkm5x910l3RI/V7wNJ0jaK37Nb+JtjbDWSfq2u+8haZSkk+JtxXasHW9J+qS77yupXtIYMxsltmEtOkXSP8uesw1r0yfcvb7skhNsx9ryC0nXu/vukvZV9DfJNqwR7v6v+O+vXtJISW9Iulpsw0z1iqJU0v6SHnf3J9z9bUlXSPpM4EyowN3/KunFVpM/I2l2/Hi2pCPLpl/h7m+5+5OSHle0rRGQu69094Xx49cU/ec7VGzHmuGRNfHTfvHNxTasKWa2vaSxkn5bNplt2DuwHWuEmb1P0kGSLpYkd3/b3V8W27BWHSJpqbs/JbZhpnpLUTpU0jNlz5fF01AbBrn7SikqeCRtF09nu+acmdVJGi7p72I71pT4sM9Fkp6TdJO7sw1rzwxJ/ynp3bJpbMPa45JuNLNGM5scT2M71o4PSlot6X/jQ+l/a2bvFduwVk2QNCd+zDbMUG8pSq3CNIYVrn1s1xwzswGSrpJ0qru/Wm3RCtPYjoG5+/r4UKXtJe1vZntXWZxtmDNmNk7Sc+7e2NGXVJjGNsyHA919hKJTkE4ys4OqLMt2zJ++kkZIutDdh0t6XfFhnm1gG+aUmfWXNF7Sn9tbtMI0tmEX9ZaidJmkHcqeby9pRaAs2HirzGywJMX3z8XT2a45ZWb9FBWkl7v7/8WT2Y41KD7M7HZF58WwDWvHgZLGm1mTolNWPmlmvxfbsOa4+4r4/jlF57HtL7ZjLVkmaVl8tIkkXamoSGUb1p5PS1ro7qvi52zDDPWWovR+Sbua2c7xrxwTJM0NnAkdN1fScfHj4yRdWzZ9gpltamY7S9pV0n0B8qGMmZmic2f+6e7/XTaL7VgjzGygmW0ZP95M0r9JWiK2Yc1w9++5+/buXqfo/7xb3f0YsQ1ripm918y2aH4s6VOSHhbbsWa4+7OSnjGz3eJJh0haLLZhLZqoDYfuSmzDTPUNHaA7uPs6MztZ0g2S+ki6xN0fCRwLFZjZHEmjJW1rZsskTZU0XdKfzOwrkp6WdLQkufsjZvYnRf+4r5N0kruvDxIc5Q6UdKykf8TnJErS98V2rCWDJc2ORwvcRNKf3H2+md0jtmGt4++wtgySdHX0W5/6SvqDu19vZveL7VhLviHp8rhj5AlJ/67431a2YW0ws80lHSrpxLLJ/HuaIXPnEGgAAAAAQBi95fBdAAAAAEANoigFAAAAAARDUQoAAAAACIaiFAAAAAAQDEUpAAAAACAYilIAAAAAQDAUpQAAAACAYChKAQAAAADB/H860d8Vc9RlcAAAAABJRU5ErkJggg==\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "items_per_user=df.groupby(['user']).count()['rating']\n",
+ "\n",
+ "plt.figure(figsize=(16,8))\n",
+ "plt.hist(items_per_user, bins=100)\n",
+ "\n",
+ "# Let's add median\n",
+ "t=items_per_user.median()\n",
+ "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
+ "plt.text(t*1.1, plt.ylim()[1]*0.9, 'Median: {:.0f}'.format(t))\n",
+ "\n",
+ "# Let's add also some percentiles\n",
+ "t=items_per_user.quantile(0.25)\n",
+ "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
+ "plt.text(t*1.1, plt.ylim()[1]*0.95, '25% quantile: {:.0f}'.format(t))\n",
+ "\n",
+ "t=items_per_user.quantile(0.75)\n",
+ "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
+ "plt.text(t*1.05, plt.ylim()[1]*0.95, '75% quantile: {:.0f}'.format(t))\n",
+ "\n",
+ "plt.title('Number of ratings per user', fontsize=30)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6UAAAHvCAYAAACsfXllAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABOH0lEQVR4nO3de5wU5ZX/8e8RJF7whiJyUTGIRlEcYTBkTQwxm0AU0ISY4EbFNcuwURLNVYw/BaLu6ibu4n0ZEhWzrsbVuHJJwEtkvUTFGQTjXYgYLiMgBgVFCXh+f1QNNENPTw9T81TVzOf9evWru6uq6zx9+mHo00/VU+buAgAAAAAgDbuk3QAAAAAAQPtFUQoAAAAASA1FKQAAAAAgNRSlAAAAAIDUUJQCAAAAAFJDUQoAAAAASA1FKQAEYGaTzMzj25C025M3ZtbFzK4ys+fMbL2ZfRzncl3abWstZnZ7QZ/pnXZ7kB4zG1LQFyal3R4ASFrHtBsAoG0xs4YXPx7s7s808ZrRku6Kn05290mt0Tbkk5l1k/S0pN4pN6VF4h8jhsRPb3f3pWm1BW2PmV0kaV9J69x9SqqNAYBmoigF0Nr+RdIX024Ecu1SbStIn5T0X5JWSXJJf0upTTtjiKSJ8eN5kpam1RC0SRdJOlTSm5KmpNoSAGgmilIAre1kM/t7d3847YYgt06J7/8q6cvu/kGajQnF3c+VdG7KzUAGuPs8SZZ2OwCgtXBOKYDWUlg4/GtqrUBbcHB8/2p7KUgBAGhPKEoBtJZlku6PH1ea2dfSbAxyrVN8/1GqrQAAAK2CohRAa/p/kj6OH19pZh12dkcFM0/Oa+m2Zjavfpv4+S5mdl68fLWZvW9mfzKzS81srwavPcjMrjCz583sPTN718weM7Nv7sR7OsXMHjCz5Wb2UXx/l5l9phn7OCBu5+Nm9paZbTKzNfHznzRsf5HXL41zsTR+vpuZfc/MnjCzVfEst/Oa+94K9r+HmX3fzB6N2/dRnOMnzOwSM9unkddtna24YPHnCz7bnZrJ2MzOLXjtufGySjP7pZktjj/77fZrkc/Fs//+wcxWxu/jfTN7w8zuNrMRZlb08Mr696Jt55NK0qNF3su8Bq8rOftusRlZzewQM7vWzF6J27fOzP5oZuebWVmn7JjZV81sdvz5fxj3kf8ys083lsNG9nOamd1jZn82sw/ifa0ws0Vm9msz+5aZ7VdOmxrZ/3Z5M7OuZvYzi/7tvhv/+6w1swlmtnsz9jsszv3rFs30/IGZLYmXfbaJ1za7f5XZpkZn340/H1d0PqkkHVqkbzX6WZlZJzP7tpnNMLNl8ee0zqK/cdcW63sNXr9DPzWzUWb2+/jfysa4P/7czLo2eO0+Zvbj+HP6q5ltMLNnzeyfzYzvqEB74u7cuHHjlthN0eQzLumV+PntBcvObeQ1owu2mdTEfuc1ow1Ft1U0yUz9Np0lPVzwvOFtgaT94td9RtLqEtv+okSbJhVsN0TSTSX2s0XSxDLe57mS3iuxH5f0lqTPlNjH0ni7pZIOk/RCkX00mfNG9j1Y0oom2ve2ovNES+Wr1G1IM9t0bmF/lDRB0uZS+5V0W5lt+b2kvVvwXuY1eN3tBet6F9nvkIL1kyQNU3TebWP7f1DSJ0rkZldJ95R4/WZJP2yYwyL72V3SrDLf80UJ/K2ZJ+l4SctLxHld0mFN7K+rpEfKaPMvJe2aVP8q871u91k38m+4qVuxz6pS0p+beN1HksaVaFthP+2jaCKyxva1VNKh8euOlLS4xLb3SLKd7R/cuHHL142JjgC0tomSzlR0COYkM/tvd9+UcpsK3aZoduAnFX0JekvRiMMF8f3xkqaY2URJcxW9j19KekLSJkmfkzRW0cRxPzSzue7+UBMxL5R0uqKC7JeSnpe0h6KiYpSio1gmmdlad7+x2A7M7EJtm2HzI0n3SXpc0lpJXeJ9nSapm6SHzWyQu79Uok2fkPRbSf3i93afpJWKvqh3a+L9FGtfhaQ/KCpQJOk5Sf8t6S+SDpL0DUknStpf0iwz+7JHk7nUu1vSwvjx/fH9i4pG3wu90Ny2FfiGpK9IelfSdEm1in4QOC5eVm93RTn+P0nzJS2R9L6i3Bwh6Wxty/kdij7bQvXvZbSk+hH1y4q0/e0WvJcKST9WNBnOVElPxW2ulPTPkvaU9CVFMxlf3sg+qiWdET/+UFGx8ZSinFRK+rakX0i6t4m2/IukU+PHdYqKlBclbVD0I9Dhin7gOansd1faPor6bk9Fhff/SnpHUdHzbUmHxDEfMbMKd3+v4Q7MrIui99onXvSSpP+R9Jqioz36KSoye8X77KimJ6Eqt3+1VJWivx/VivrkmnhZQwsKn1h0RMbD8WulqCD/vaJTL3ZT9BmdE6//TzP7yN1vb6ItV0v6uqK/af+laCbggxT9jTxG0d/UO8zsNEkPKfrMfhO3Y72kAYr+9u6pqC8+qOhvJIC2Lu2qmBs3bm3rpm2/cr9SsOz6guXfK/KaNEdKXdJPi2zTVdtG+TYrKipWS+pfZNuzC/b1u0ZiTmoQ8wVJBxbZ7nRFlzlxRYXPIUW2GViwzSuS+jYS81RFhbNLeqaRbZY2aNf3E+gDu2j7EdcpknYpst1lBdv8RdJuLf3sy2jbuQ3e78uSejTxms9J2rfE+j21/Qjj58voA0PKaOvtBdv3LrJ+SIP38maxviDphIL+8o6KjJYq+mGmfj9rJB1TZJveRfrLuQ226SBpnbaNiu3Qxxv8G/tUCz7LhiNrFxTZprOkRwu2uaGRfd1fsM3/a6S/dlb0w1T9dsOS6F9lvtfCz3pSI9vUfzZLy9jfXvG/OVf0Y8FXGtnu8Lhf1W93QBP91CX9Z8P8KfphZ1HBNjWKJsM7ucj+TlL0Q4BLeqmluePGjVs+bhyvDyCEKxUVWJJ0qZl1TrMxDcx1939puNDd10iqH6XsoGh0Y7y7P19k218rOjxQkr5Yxrl7myV9091XF9nX/0q6Nn66h6TvFHn9REUjNR9JGu7urxfZRu4+W9HIhSSdYGZ/10S77nf3/2him3IMVzSyJElPKyp0P264kbtfIWl2/PRgSWclELs5XNJod19ZciP3x919XYn17ysaPavv42cn1sLmOatYX3D3+YpGoyRpP0VFakPfL3g83t13GIF296VqenSwq6KRS0l6oFgfL9jfGnd/pYn9letud7+pSIwNin70qh8d/baZ7Vu4jZkN0LbR7Vvd/cpG+mv9vupHOX/QRJvK6l8pGatts1p/x91/X2wjd18s6R/jp3uq+AhsoRcU9Z/t8ufuG7Xtb5EU/bA2yd3/UCTmY4pGTiXpKDM7uOE2ANoeilIArS7+Yjolfnqgoou8Z0XRw2NjTxY8XqXShy0+Ed930rZDABsz191fLLF+iqLD/CTpq4Ur4olh6g+NfCD+0ljKfxU8/nIT297QxPpyFc60/HN39xLbFn5RDT1D8+PuviiJHbn7ekl/ip9+Ool9NtNz7v54ifWFX/6PLlxhZrtpW9+oU3TYalEeHWK9ww8zBTY2FqeVXdvYCndfpW3/DnZXdJh1ocIfEX5RKoi7/1XS7+KnJ5nZJ0psnlj/agX177lO0p2lNowLx/rCuqm/IVPdfXMj6wr/nm5RdJh5Y54oeByyHwFICeeUAgjl54pG/bpI+pGZ3ezu76TcJkl6psS6VQWPa4uNnjSybVMzij5SaqW7v2VmLys6B+sIM9vH3etHZ07Uth8UPzSz05uItWvB46NKbLdF0Tl1SagfiXNF542V8kdtO9cwdDFXqojbTlx8fEPRebrHKTrPtrOiczgb6pVI65rn6SbWryh43LB/Hqdt/eT/mujnUnT4e/9iK9z9XTObr6gP/L2Z3a/ox47H3f1vTex3Z72r6HzNUv4g6fz48SBF5/nW+1x8v0nSkWZ2ZBP7+kTB/ScVHaJbTNn9KySLZryu//zqJI204hNHF9oQ35f6GyKV//f01YK/aU1tu9MzNAPID4pSAEHEX1avkXSNosP7Jkj6SbqtkhRNDNSYwutiltqu4ba7NbFtU6Ob9dsco6joOUjbDhnsXbDNOfGtXKW+3K119w+bsa9Susf3b8UjiI1y94/NbImiwqiLmXXycBNhrWh6E8nMjlU08VPfMve79063aOc1NUlSqf7Zo+Dxn8uI1dQ2Fyj64WVvRYfFni7pfTN7RtEI2MOSniyj+C3XkiZG46Xt/831aLCud3zfSdsm1SpXqX9TZfWvFBysbT9sDVDz3nNTBWKjfyfd/aOC4jfJv6cA2gAO3wUQ0g3adhjYeDNr+OUwuGZ8MU7qC7QUTfDRlPcLHheeg7tPww2boVOJdRtLrGuu+mujvl9yq202FDwueV3VhDX5nuNZWR/WtoJ0maRbFM2g/A+KDjn+anyrPyQ7jf9bW9I/9yx43Ny+uQN3r1E0G/Ad2pbjPSWdrGjm38ckLTGzpM4hbsm/Jykf/6aS1JL3u2sT69P4ewqgDWCkFEAw7r7RzK5Q9KV+d0VfUP85yRg5ueD6Hk1vsl2hsKGRx+e6+/RkmpSo9ZL21fbvoZTCIqHkyGoKxis6D1qKLuvxT42dM2dmlwZrVbIKC7bm9s2i3P0NSWPMbJyiS4v8naTPSvq8on/7vSX92swOKTbRWDO15N9T/fN9Fc1ae1gL25IHhe//dnf/x0a3BIBA8vDlDUDb8itF13mUopkwDy/zdfWHdJYamZCkA3aqVWGV857rt3FF106tV3hIYD9lU118f5CZlRz5tOh4vvqJodYGPHS3XH8f32+WdFGJSVyk6BqMeVQ4O+wny9i+nG0kSe7+obs/6u5XuftXFBX4Fyvq15J0uZntX35Ti+pjTZ8UWfhvruFsuPX/pg42szQOvQ4tD39DALQzFKUAgoonO7k8ftpR0s/KfOm6+L6pQ37TmPm0uU4utdLMDtK2CUVeazAhyGPa9oX+tIyODM+P703R9S9L+TttGymdX2rDlHSL79eWuiyMmR2v6HIopRQestjkzDIBLVJ0HVMpmlG2qT41ZGcDufsGd/83RefoStFkQYN2dn+xfRSdG1nKFwoeP9tg3f/F9x0kjWhhW9JU379K9i13f1vSS/HTgVxyBUAWZPHLDIC27y5tu6zEaEWT3DSl/kvUoWZWaqTmey1pWCDDzKzULJbfU/QFWZJ+W7givrzOnPjpEYquj5k19xU8/lETo1gXN/K6rKg/X/HAJkZ9Ly+xrl7hYZPlHtrc6uIJrh6Mn/aQdEZj25rZEDUy824zLS14nMSpRI1eM9TMumrbNXA3atu/n3p3FDy+3Mwy89k0U33/Kqf99Yf97yLpX1unOQBQPopSAMHFM2XWn39nkr5bxssKv0heU6zQMbOfadvhllnWUdJv4i/L2zGzEZJ+FD/9QNH5tw39P20b2bqhqQljzOwQM/u5mR1YarsEzda2SX9OlPTzYqNvZvZTbRuZWqYmrpeYkvpRNZN0ZcOVFvmZohlmm/JGweOmRvZCm1Lw+EYzO6bhBmbWW9LtpXZiZseb2WVm1q3ENgdoW+HrKn3d03L9g5ntcH56XGDepW2T+/yq4Yi3uz+jbT+IHCFpZhPt72hmp5vZ+Y1tk5L6/rW/mR3SxLY3SXozfvwtM/sPM2v01Agz29vMvmdmefj7CiCHmOgIQCrcfZaZ/VHR4Zvl/LJ/q6JLyHSR9HVJj5vZnYouhXGIohHXSkXXHxzdKo1Ozv8qKmJeNLNpkv6kaLKWoYq+rNcX3Be7+7KGL3b3BWb2HUnTFB3++Gsz+6GkBxRd+uIjRRO3fEpRUXhCvM/rWu0dbd++j83sbElPKprU5oeSvhB/XssVHRL7DUUT30hRgX1OgpekSdLNks5TNHL9PTOrUDR6/ZaiS2v8g6TjFY3kb5Q0sMS+Hlf0XneV9GMzqy/I6i9/8Y67p3IIs7s/bGa3SzpX0XnZz8bP/6josNBKRXnYW9K9iv4NSjvOorqPokPyJ5rZk/HrX1M0gVUXSccqylmXePs73f0vLWz+QkX9/Zb4ur33Kzrcv/5Igvpzfd/Qth/DGjov3v5YRYf6/tnM7lV07d63FV2WpLuiHxO+HLf/Vy1sd9IekTQyfvxbM7tF0fnd9Z/Rn9x9hSS5+/txrv5P0Wd6kaRvmNk9ivrke4pmwj5M0d+PLyj6W3N2kHcCoN2hKAWQpku07Xyuktx9TTwi+FtFXxBPjG+FZin6Epr1ovQ6RZONXCDpp0XWu6SfufuNje3A3X9lZqsVFabdFF2Co6JEzLWSghV97v6cmX1R0QhU/Zf5YqOD70j6B3efF6ptzeHuC83su5JuVHR00UnxrdDLkk6T9Msm9vW2mf1CUb/vrB3Pp/4/teB8zQRUKWrX1xX9G/tnbT879seKRvHf1baitOFsyfUFUAcVz1Whe+KYLfWupH9U9O9/aHxraImkL7v7e8V24O7vmdmJiv49fVPRj0RNXQe44YRJabtV0d+UIxT9ONKwP/6jCka64759gqKR5OMVHbp9UYn9f6Smr4cLADuFw3cBpMbdH9OO53eV2v73igqv2yT9RdGMvGskParoF/yR7l7ONQtT5+7jJZ0qaaaiL7eb4vvfSDrR3SeVsY+ZikYy/lnSDEWHwG7Utrw8pejasCMk9YgnOAnG3Z9SdH3PHygquNYoGilcG7ftUkl93H1uyHY1l7vfougHkP9RNEL6N0mrFY0C/kBSpbsvLnNfP5V0pqJ+/5a2zSqdOnf/m7ufIWmUovatUVSI/EXRodUnuvu1kgpny32nwT4ek3SkpO8oKjpfUXSu48fx/UuKiqfPu/s33T2Ra3m6+0JFhdWVkl5QVCxvkPScoh9++rv7n5vYx3p3H63ox5Mp8WvXKpp5eYOk1xUd5fADRf22nPOIg3H3DZIGS7pK0gJFxXrJ64G6+6uKCtjTFJ1n+pqiUdItikabFyk65/ZcSd3dvey/1wDQHBad2gUAANA0M7tP0tfip/u7+zultm/FdtR/gfk/dx+SRhsAAMlgpBQAAJQlnuxoePx0UVoFKQCgbaEoBQAAMrM+ZtarxPqeiiYRqp+ldWqQhgEA2jwmOgIAAJL0GUm3mdljimYKXqLoHOX9FZ2r+A1FEwBJ0tOSqtNoJACg7aEoBQAA9TpKOjm+NWaepFHuviVIiwAAbR5FKQAAkKIZnL8laZiimWwPUHQ9zk2SVkl6RtLd8azPAAAkJhOz7x5wwAHeu3fvtJux09asWaOuXbu2+Zh5RJ4AAACA9NXW1r7t7kW/mGdipLR3796qqalJuxkAAAAAgFZgZm82to7ZdxMwadKkdhEzj8gTAAAAkG2ZOHy3srLS8zxSamYKncc0YuYReQIAAADSZ2a17l5ZbB0jpQAAAACA1FCUAgAAAABSQ1GagDQOPc7z4c4hkScAAAAg2yhKAQAAAACpYaKjBDDRUXaRJwAAACB9THQEAAAAAMgkilIAAAAAQGooShMwceLEdhEzj8gTAAAAkG2cUwoAAAAAaFWcU9rKevTo0S5i5hF5AgAAALKNojQBdXV1WrZsmb7whS/oqKOOUr9+/XTddddtXT9p0iT17NlTFRUVqqio0O9+9ztJ0pNPPqn+/ftr0KBBWrx4sSRp3bp1Gjp0aJMzxtbV1bXa+/nf//1fvfTSS1ufX3755Xr44YclSUOGDGnRtT/nzZuniooK9evXT5///Odb3NamNJanV199devnUVFRob333ltTpkyR1DqfV2tqrc/r3Xff1YgRI3TcccepX79+uu2227aumzNnjo488kgdfvjhuvrqq1v2BgAAANCudUy7AW1Fx44dde2112rAgAFav369Bg4cqC996Us6+uijJUnf//739aMf/Wi711x77bW67777tHTpUt1yyy269tprdcUVV+inP/2pzCyNtyEpKnKGDx++te0/+9nPEtnvunXrdP7552vOnDk65JBDtHr16kT2uzOOPPJILVy4UJK0ZcsW9ezZU1/96le3rufzkm666SYdffTRmjlzptasWaMjjzxS3/rWt9ShQwddcMEFeuihh9SrVy8NGjRII0eO3BofAAAAaA5GShMwYMAAde/eXQMGDJAk7bXXXjrqqKO0YsWKkq/bddddtXHjRn3wwQfaddddtWTJEq1YsaLkCOKcOXP0qU99Snvuuae+973vafjw4ZKi0b1f/OIXW7c75phjtHTpUknS6aefroEDB6pfv36qrq7euk3nzp116aWX6rjjjtPgwYO1atUq/fGPf9SMGTP04x//WBUVFVqyZInOPfdc3XvvvTu05cEHH9RnPvMZDRgwQGeccYY2bNhQ8v3+93//t772ta/pkEMOkSQdeOCBJbdPQv1nUsojjzyiPn366NBDDy25XUs+r89+9rO5+7zMTOvXr5e7a8OGDerSpYs6duyo+fPn6/DDD9cnP/lJderUSaNHj9YDDzxQcl8AAABAYyhKE1BbW7vd86VLl+q5557Tpz/96a3LbrzxRvXv31/nnXee/vrXv0qSLrnkElVVVWnKlCkaP368Lr30Ul1xxRWNxvnwww81duxYzZw5U+vXr9dbb71VVvtuvfVW1dbWqqamRtdff73Wrl0rSXr//fc1ePBgLVq0SCeddJKmTZumv/u7v9PIkSP185//XAsXLlSfPn2K7vPtt9/WlVdeqYcfflgLFixQZWWl/v3f/11SdPjojBkzdnjNa6+9pr/+9a8aMmSIBg4cqDvuuKOs9rdEw8+mmLvvvltnnnnmdsuS/rwef/zx3H1e48eP18svv6wePXro2GOP1XXXXadddtlFK1as0MEHH7x1u169ejX5AwwAAADQGIrSBFRVVW19vGHDBo0aNUpTpkzR3nvvLUn6zne+oyVLlmjhwoXq3r27fvjDH0qSKioq9PTTT+vRRx/Vn//8Z/Xo0UPurm9+85s666yztGrVqu3ivPLKKzrssMPUt29fjRs3TmeddVZZ7bv++uu3jq4tW7ZMr7/+uiSpU6dOW0fuBg4cuHWkrhxPP/20XnrpJZ144omqqKjQ9OnT9eabb0qKDh8dOXLkDq/ZvHmzamtrNXv2bM2dO1dXXHGFXnvttbJj7ozCz6aYTZs2acaMGTrjjDO2LmuNz8vMcvd5zZ07VxUVFVq5cqUWLlyo8ePH67333it6/myahy8DAAAg3yhKEzBt2jRJ0t/+9jeNGjVK3/rWt/S1r31t6/pu3bqpQ4cO2mWXXTR27FjNnz9/u9e7u6688kpddtllmjx5siZPnqyzzjpL119//Q6x6r/818es17FjR3388cdbn3/44YeSoomFHn74YT311FNatGiRjj/++K3rdt11163769ChgzZv3lz2e3Z3felLX9LChQu1cOFCvfTSS/rVr35V8jW9evXSsGHDtOeee+qAAw7QSSedpEWLFpUdc2c0zFNDv//97zVgwAB169Zt67LW+LwaysPnddttt+lrX/uazEyHH364DjvsML3yyivq1auXli1btnW75cuXM8sxAAAAdhpFaULcXd/+9rd11FFH6Qc/+MF26wpngL3//vt1zDHHbLd++vTpOvXUU7Xffvvpgw8+0C677KJddtlFH3zwwXbbfepTn9Ibb7yhJUuWSJLuuuuuret69+6tBQsWSJIWLFigN954Q1I0g+p+++2nPfbYQ6+88oqefvrpJt/LXnvtpfXr15fcZvDgwXryySe3zkL7wQcfNDnqedppp+nxxx/X5s2b9cEHH+iZZ57RUUcd1WR7WtNdd921w6G7fF6RQw45RI888ogkadWqVXr11Vf1yU9+UoMGDdLrr7+uN954Q5s2bdLdd99ddKQVAAAAKAez75ap94TZJdc/+eST+vWvf61jjz1WFRUVkqR/+Zd/0SmnnKKf/OQnWrhwocxMvXv31tSpU7e+7oMPPtD06dP14IMPSpJ+8IMfaNSoUerUqdN2RYwk7bbbbqqurtapp54qSTr00EP1wgsvSJJGjRqlO+64QxUVFRo0aJCOOOIISdKwYcP0n//5n+rfv7+OPPJIDR48uMn3Onr0aI0dO1bXX3990QlzJKlr1666/fbbdeaZZ+qjjz6SJF155ZU64ogjdPnll6uysnKHQuWoo47SsGHD1L9/f+2yyy76p3/6px0KvpA++OADPfTQQ9t9HpJa5fM64IAD9NnPfjZXn9dll12mc889V8cee6zcXddcc40OOOAASdE5t0OHDtWWLVt03nnnqV+/fk22EwAAACjG0ry+Yr3KykpvybUvQyhVlG5ev1bLbzonYGuklStX6rXXXtMvfvELzZo1K2jsPFm5cmVmDi2dN28enxcAAADaJTOrdffKYus4fDcBm1YtDh6znFllQZ4AAACArGOktEylRkrfvGZ40RlJW5OZBY+ZR+QJAAAASB8jpQAAAACATKIoBQAAAACkhqI0AV2Gjg8es+GMsSiOPAEAAADZRlGagL0qhgWPWVVVFTxmHpEnAAAAINsoShPw5jXDg8c0s+Ax84g8AQAAANlGUQoAAAAASA1FKQAAAAAgNRSlCdi9z6DgMYcPD3/IcB6RJwAAACDbKEoTcODXJwaPOXPmzOAx84g8AQAAANlGUZqA1fdODh5zxIgRwWPmEXkCAAAAso2iNAEblzwbPOasWbOCx8wj8gQAAABkG0VpG2JmOvvss7c+37x5s7p27drs8yqHDBmimpoaSdIpp5yidevWJdlMLVy4UJ/5zGfUr18/9e/fX7/5zW+2rvvc5z6niooKVVRUqEePHjr99NMTjQ0AAAAgWzqm3QAkZ88999QLL7ygjRs3avfdd9dDDz2knj17tmifv/vd7xJq3TZ77LGH7rjjDvXt21crV67UwIEDNXToUO277756/PHHt243atQonXbaaYnHBwAAAJAdjJQm4NCLwx8i6u5Fl3/lK1/R7NmzJUl33XWXzjzzzK3r3n//fZ133nkaNGiQjj/+eD3wwAOSpI0bN2r06NHq37+/vvnNb2rjxo1bX9O7d2+9/fbbkqTTTz9dAwcOVL9+/VRdXb11m86dO+vSSy/Vcccdp8GDB2vVqlUl237EEUeob9++kqQePXrowAMP1Jo1a7bbZv369frDH/7Q4pHSxvIEAAAAIBvKLkrNrIOZPWdms+LnXczsITN7Pb7fr2DbS8xssZm9amZDW6PhWbJ+4ZzgMQuLwkKjR4/W3XffrQ8//FDPP/+8Pv3pT29dd9VVV+nkk0/Ws88+q0cffVQ//vGP9f777+uWW27RHnvsoeeff16XXnqpamtri+771ltvVW1trWpqanT99ddr7dq1kqJid/DgwVq0aJFOOukkTZs2TZI0Y8YMXX755SXfx/z587Vp0yb16dNnu+X333+/vvjFL2rvvfcuOyfFNJYnAAAAANnQnJHSCyW9XPB8gqRH3L2vpEfi5zKzoyWNltRP0jBJN5tZh2Sam03vzL0xeMxx48YVXd6/f38tXbpUd911l0455ZTt1j344IO6+uqrVVFRoSFDhujDDz/UX/7yFz322GM666yztr6+f//+Rfd9/fXXbx0NXbZsmV5//XVJUqdOnbaetzpw4EAtXbpUkjRy5Ej97Gc/a/Q91NXV6eyzz9Ztt92mXXbZvis2HOXdWY3lCQAAAEA2lHVOqZn1knSqpKsk/SBefJqkIfHj6ZLmSbo4Xn63u38k6Q0zWyzpBElPJdZqlDRy5Ej96Ec/0rx587aOZkrRoaz33XefjjzyyB1eY2Yl9zlv3jw9/PDDeuqpp7THHntsLWoladddd936+g4dOmjz5s1NtvG9997TqaeeqiuvvFKDBw/ebt3atWs1f/583X///U3uBwAAAEC+lTtSOkXSTyR9XLCsm7vXSVJ8f2C8vKekZQXbLY+XIZDzzjtPl19+uY499tjtlg8dOlQ33HDD1vMsn3vuOUnSSSedpDvvvFOS9MILL+j555/fYZ/vvvuu9ttvP+2xxx565ZVX9PTTT+90+zZt2qSvfvWrOuecc3TGGWfssP5//ud/NHz4cO222247HQMAAABAPjRZlJrZcEmr3b34iYZFXlJk2Q6zzZhZlZnVmFlNw0lu8qbrqMuCx5wxY0aj63r16qULL7xwh+WXXXaZ/va3v6l///465phjdNllUbu/853vaMOGDerfv7/+7d/+TSeccMIOrx02bJg2b96s/v3767LLLtthdLOxNhY7p/See+7RY489pttvv33r5V8WLly4df3dd9+dyKG79W0AAAAAkF3W1OykZvavks6WtFnSbpL2lvRbSYMkDXH3OjPrLmmeux9pZpdIkrv/a/z6uZImuXujh+9WVlZ6/XUxs6r3hNmNrtu8fq2W33ROwNZIK1euVI8ePYLGzCPyBAAAAKTPzGrdvbLYuiZHSt39Enfv5e69FU1g9Ad3P0vSDElj4s3GSHogfjxD0mgz+4SZHSapr6T5LXwPmbbi5jFNb5Swll5/tL0gTwAAAEC2lTXRUSOulnSPmX1b0l8knSFJ7v6imd0j6SVFo6sXuPuWFrcUAAAAANDmNKsodfd5imbZlbuvlfTFRra7StFMvQAAAAAANKo51ylFIzofNzR4zLFjxwaPmUfkCQAAAMg2itIE7D/su8FjVldXB4+ZR+QJAAAAyDaK0gTU3b7j5Vda28CBA4PHzCPyBAAAAGQbRWkCNq1aEjzmggULgsfMI/IEAAAAZBtFKQAAAAAgNRSlCejQuUvwmN27dw8eM4/IEwAAAJBtFKUJ6HXBHcFjrly5MnjMPCJPAAAAQLZRlCZg3RN3Bo85adKk4DHziDwBAAAA2WbunnYbVFlZ6TU1NWk3o6TeE2Y3uu7Na4YrdB7NLHjMPCJPAAAAQPrMrNbdK4utY6QUAAAAAJAailIAAAAAQGooShNw0JgpwWNm/XDnrCBPAAAAQLZRlAIAAAAAUkNRmoC3pl8UPGZlZdFzhNEAeQIAAACyjaIUAAAAAJAailIAAAAAQGooShOwz4lnBo85ceLE4DHziDwBAAAA2WbunnYbVFlZ6VmfJbX3hNkl1y+9+tRALQEAAACAfDGzWncvOuELI6UJWH7TOcFj9ujRI3jMPCJPAAAAQLZRlCZgy4Z3gsesq6sLHjOPyBMAAACQbRSlAAAAAIDUUJQmoFO3PsFjDhgwIHjMPCJPAAAAQLZRlCag+7nXBY9ZW1sbPGYekScAAAAg2yhKE7B2zg3BY1ZVVQWPmUfkCQAAAMg2itIEbFg0N3jMadOmBY+ZR+QJAAAAyDaKUgAAAABAaihKAQAAAACpoShNQM/zpwePuWLFiuAx84g8AQAAANlGUZqATasWB4/JrLLlIU8AAABAtlGUJmDNfVcEjzly5MjgMfOIPAEAAADZRlEKAAAAAEgNRSkAAAAAIDUUpQnoMnR88JhTp04NHjOPyBMAAACQbRSlCdirYljwmFVVVcFj5hF5AgAAALKNojQBb14zPHhMMwseM4/IEwAAAJBtFKUAAAAAgNRQlAIAAAAAUkNRmoDd+wwKHnP48PCHDOcReQIAAACyjaI0AQd+fWLwmDNnzgweM4/IEwAAAJBtFKUJWH3v5OAxR4wYETxmHpEnAAAAINuaLErNbDczm29mi8zsRTObHC+fZGYrzGxhfDul4DWXmNliM3vVzIa25hvIgo1Lng0ec9asWcFj5hF5AgAAALKtYxnbfCTpZHffYGa7SnrCzH4fr/sPd/9F4cZmdrSk0ZL6Seoh6WEzO8LdtyTZcAAAAABA/jU5UuqRDfHTXeObl3jJaZLudveP3P0NSYslndDilgIAAAAA2pyyzik1sw5mtlDSakkPufsz8arxZva8md1qZvvFy3pKWlbw8uXxsjbr0IvDHyLqXup3AdQjTwAAAEC2lVWUuvsWd6+Q1EvSCWZ2jKRbJPWRVCGpTtK18eZWbBcNF5hZlZnVmFnNmjVrdqLp2bF+4ZzgMaurq4PHzCPyBAAAAGRbs2bfdfd1kuZJGubuq+Ji9WNJ07TtEN3lkg4ueFkvSSuL7Kva3SvdvbJr16470/bMeGfujcFjjhs3LnjMPCJPAAAAQLaVM/tuVzPbN368u6S/l/SKmXUv2Oyrkl6IH8+QNNrMPmFmh0nqK2l+oq0GAAAAALQJ5cy+213SdDProKiIvcfdZ5nZr82sQtGhuUsljZMkd3/RzO6R9JKkzZIuYOZdAAAAAEAxTRal7v68pOOLLD+7xGuuknRVy5qWH11HXRY85owZM4LHzCPyBAAAAGRbs84pRXGduh0ePObAgQODx8wj8gQAAABkG0VpAlbcPCZ4zJ492/RVdhJDngAAAIBsoygFAAAAAKSGohQAAAAAkBqK0gR0Pm5o8Jhjx44NHjOPyBMAAACQbRSlCdh/2HeDx6yurg4eM4/IEwAAAJBtFKUJqLv9wuAxmVW2POQJAAAAyDaK0gRsWrUkeMwFCxYEj5lH5AkAAADINopSAAAAAEBqKEoT0KFzl+Axu3fvHjxmHpEnAAAAINsoShPQ64I7gsdcuXJl8Jh5RJ4AAACAbKMoTcC6J+4MHnPSpEnBY+YReQIAAACyzdw97TaosrLSa2pq0m5GSb0nzG503ZvXDFfoPJpZ8Jh5RJ4AAACA9JlZrbtXFlvHSCkAAAAAIDUUpQAAAACA1FCUJuCgMVOCx8z64c5ZQZ4AAACAbKMoBQAAAACkhqI0AW9Nvyh4zMrKoucIowHyBAAAAGQbRSkAAAAAIDUUpQAAAACA1FCUJmCfE88MHnPixInBY+YReQIAAACyzdw97TaosrLSsz5Lau8Js0uuX3r1qYFaAgAAAAD5Yma17l50whdGShOw/KZzgsfs0aNH8Jh5RJ4AAACAbKMoTcCWDe8Ej1lXVxc8Zh6RJwAAACDbKEoBAAAAAKmhKE1Ap259gsccMGBA8Jh5RJ4AAACAbKMoTUD3c68LHrO2tjZ4zDwiTwAAAEC2UZQmYO2cG4LHrKqqCh4zj8gTAAAAkG0UpQnYsGhu8JjTpk0LHjOPyBMAAACQbRSlAAAAAIDUUJQCAAAAAFJDUZqAnudPDx5zxYoVwWPmEXkCAAAAso2iNAGbVi0OHpNZZctDngAAAIBsoyhNwJr7rggec+TIkcFj5hF5AgAAALKNohQAAAAAkBqKUgAAAABAaihKE9Bl6PjgMadOnRo8Zh6RJwAAACDbKEoTsFfFsOAxq6qqgsfMI/IEAAAAZBtFaQLevGZ48JhmFjxmHpEnAAAAINsoSgEAAAAAqWmyKDWz3cxsvpktMrMXzWxyvLyLmT1kZq/H9/sVvOYSM1tsZq+a2dDWfAMAAAAAgPwqZ6T0I0knu/txkiokDTOzwZImSHrE3ftKeiR+LjM7WtJoSf0kDZN0s5l1aIW2Z8bufQYFjzl8ePhDhvOIPAEAAADZ1mRR6pEN8dNd45tLOk3S9Hj5dEmnx49Pk3S3u3/k7m9IWizphCQbnTUHfn1i8JgzZ84MHjOPyBMAAACQbWWdU2pmHcxsoaTVkh5y92ckdXP3OkmK7w+MN+8paVnBy5fHy9qs1fdODh5zxIgRwWPmEXkCAAAAsq2sotTdt7h7haRekk4ws2NKbF5sulPfYSOzKjOrMbOaNWvWlNXYrNq45NngMWfNmhU8Zh6RJwAAACDbmjX7rruvkzRP0bmiq8ysuyTF96vjzZZLOrjgZb0krSyyr2p3r3T3yq5duza/5QAAAACA3Ctn9t2uZrZv/Hh3SX8v6RVJMySNiTcbI+mB+PEMSaPN7BNmdpikvpLmJ9xuAAAAAEAb0LGMbbpLmh7PoLuLpHvcfZaZPSXpHjP7tqS/SDpDktz9RTO7R9JLkjZLusDdt7RO87Ph0IvDHyLqvsMR0SiCPAEAAADZVs7su8+7+/Hu3t/dj3H3n8XL17r7F929b3z/TsFrrnL3Pu5+pLv/vjXfQBasXzgneMzq6urgMfOIPAEAAADZ1qxzSlHcO3NvDB5z3LhxwWPmEXkCAAAAso2iFAAAAACQGopSAAAAAEBqKEoT0HXUZcFjzpgxI3jMPCJPAAAAQLZRlCagU7fDg8ccOHBg8Jh5RJ4AAACAbKMoTcCKm8c0vVHCevbsGTxmHpEnAAAAINsoSgEAAAAAqaEoBQAAAACkhqI0AZ2PGxo85tixY4PHzCPyBAAAAGQbRWkC9h/23eAxq6urg8fMI/IEAAAAZBtFaQLqbr8weExmlS0PeQIAAACyjaI0AZtWLQkec8GCBcFj5hF5AgAAALKNohQAAAAAkBqK0gR06NwleMzu3bsHj5lH5AkAAADINorSBPS64I7gMVeuXBk8Zh6RJwAAACDbKEoTsO6JO4PHnDRpUvCYeUSeAAAAgGwzd0+7DaqsrPSampq0m1FS7wmzG1335jXDFTqPZhY8Zh6RJwAAACB9Zlbr7pXF1jFSCgAAAABIDUUpAAAAACA1FKUJOGjMlOAxs364c1aQJwAAACDbKEoBAAAAAKmhKE3AW9MvCh6zsrLoOcJogDwBAAAA2UZRCgAAAABIDUUpAAAAACA1FKUJ2OfEM4PHnDhxYvCYeUSeAAAAgGwzd0+7DaqsrPSsz5Lae8LskuuXXn1qoJYAAAAAQL6YWa27F53whZHSBCy/6ZzgMXv06BE8Zh6RJwAAACDbKEoTsGXDO8Fj1tXVBY+ZR+QJAAAAyDaKUgAAAABAaihKE9CpW5/gMQcMGBA8Zh6RJwAAACDbKEoT0P3c64LHrK2tDR4zj8gTAAAAkG0UpQlYO+eG4DGrqqqCx8wj8gQAAABkG0VpAjYsmhs85rRp04LHzCPyBAAAAGQbRSkAAAAAIDUUpQAAAACA1FCUJqDn+dODx1yxYkXwmHlEngAAAIBsoyhNwKZVi4PHZFbZ8pAnAAAAINsoShOw5r4rgsccOXJk8Jh5RJ4AAACAbKMoBQAAAACkhqIUAAAAAJCaJotSMzvYzB41s5fN7EUzuzBePsnMVpjZwvh2SsFrLjGzxWb2qpkNbc03kAVdho4PHnPq1KnBY+YReQIAAACyrWMZ22yW9EN3X2Bme0mqNbOH4nX/4e6/KNzYzI6WNFpSP0k9JD1sZke4+5YkG54le1UMCx6zqqoqeMw8Ik8AAABAtjU5Uurude6+IH68XtLLknqWeMlpku5294/c/Q1JiyWdkERjs+rNa4YHj2lmwWPmEXkCAAAAsq1Z55SaWW9Jx0t6Jl403syeN7NbzWy/eFlPScsKXrZcpYtYAAAAAEA7VXZRamadJd0n6SJ3f0/SLZL6SKqQVCfp2vpNi7zci+yvysxqzKxmzZo1zW03AAAAAKANKKsoNbNdFRWkd7r7byXJ3Ve5+xZ3/1jSNG07RHe5pIMLXt5L0sqG+3T3anevdPfKrl27tuQ9pG73PoOCxxw+PPwhw3lEngAAAIBsK2f2XZP0K0kvu/u/FyzvXrDZVyW9ED+eIWm0mX3CzA6T1FfS/OSanD0Hfn1i8JgzZ84MHjOPyBMAAACQbeWMlJ4o6WxJJze4/Mu/mdmfzOx5SV+Q9H1JcvcXJd0j6SVJcyRd0JZn3pWk1fdODh5zxIgRwWPmEXkCAAAAss3cdzjdM7jKykqvqalJuxkl9Z4wu9F1b14zXKHzaGbBY+YReQIAAADSZ2a17l5ZbF2zZt8FAAAAACBJFKUAAAAAgNRQlCbg0ItnBY/JIanlIU8AAABAtlGUJmD9wjnBY1ZXVwePmUfkCQAAAMg2itIEvDP3xuAxx40bFzxmHpEnAAAAINsoSgEAAAAAqaEoBQAAAACkhqI0AV1HXRY85owZM4LHzCPyBAAAAGQbRWkCOnU7PHjMgQMHBo+ZR+QJAAAAyDaK0gSsuHlM8Jg9e/YMHjOPyBMAAACQbRSlAAAAAIDUUJQCAAAAAFJDUZqAzscNDR5z7NixwWPmEXkCAAAAso2iNAH7D/tu8JjV1dXBY+YReQIAAACyjaI0AXW3Xxg8JrPKloc8AQAAANlGUZqATauWBI+5YMGC4DHziDwBAAAA2UZRCgAAAABIDUVpAjp07hI8Zvfu3YPHzCPyBAAAAGQbRWkCel1wR/CYK1euDB4zj8gTAAAAkG0UpQlY98SdwWNOmjQpeMw8Ik8AAABAtpm7p90GVVZWek1NTdrNKKn3hNmNrnvzmuEKnUczCx4zj8gTAAAAkD4zq3X3ymLrGCkFAAAAAKSGohQAAAAAkBqK0gQcNGZK8JhZP9w5K8gTAAAAkG0UpQAAAACA1FCUJuCt6RcFj1lZWfQcYTRAngAAAIBsoygFAAAAAKSGohQAAAAAkBqK0gTsc+KZwWNOnDgxeMw8Ik8AAABAtpm7p90GVVZWetZnSe09YXbJ9UuvPjVQSwAAAAAgX8ys1t2LTvjCSGkClt90TvCYPXr0CB4zj8gTAAAAkG0UpQnYsuGd4DHr6uqCx8wj8gQAAABkG0UpAAAAACA1FKUJ6NStT/CYAwYMCB4zj8gTAAAAkG0UpQnofu51wWPW1tYGj5lH5AkAAADINorSBKydc0PwmFVVVcFj5hF5AgAAALKNojQBGxbNDR5z2rRpwWPmEXkCAAAAso2iFAAAAACQGopSAAAAAEBqKEoT0PP86cFjrlixInjMPCJPAAAAQLY1WZSa2cFm9qiZvWxmL5rZhfHyLmb2kJm9Ht/vV/CaS8xssZm9amZDW/MNZMGmVYuDx2RW2fKQJwAAACDbyhkp3Szph+5+lKTBki4ws6MlTZD0iLv3lfRI/FzxutGS+kkaJulmM+vQGo3PijX3XRE85siRI4PHzCPyBAAAAGRbk0Wpu9e5+4L48XpJL0vqKek0SfXHrU6XdHr8+DRJd7v7R+7+hqTFkk5IuN0AAAAAgDagWeeUmllvScdLekZSN3evk6LCVdKB8WY9JS0reNnyeBkAAAAAANspuyg1s86S7pN0kbu/V2rTIsu8yP6qzKzGzGrWrFlTbjMyqcvQ8cFjTp06NXjMPCJPAAAAQLaVVZSa2a6KCtI73f238eJVZtY9Xt9d0up4+XJJBxe8vJeklQ336e7V7l7p7pVdu3bd2fZnwl4Vw4LHrKqqCh4zj8gTAAAAkG3lzL5rkn4l6WV3//eCVTMkjYkfj5H0QMHy0Wb2CTM7TFJfSfOTa3L2vHnN8OAxo48FTSFPAAAAQLZ1LGObEyWdLelPZrYwXvZTSVdLusfMvi3pL5LOkCR3f9HM7pH0kqKZey9w9y1JNxwAAAAAkH9NFqXu/oSKnycqSV9s5DVXSbqqBe0CAAAAALQDzZp9F8Xt3mdQ8JjDh4c/ZDiPyBMAAACQbRSlCTjw6xODx5w5c2bwmHlEngAAAIBsoyhNwOp7JwePOWLEiOAx84g8AQAAANlGUZqAjUueDR5z1qxZwWPmEXkCAAAAso2iFAAAAACQGopSAAAAAEBqKEoTcOjF4Q8RdffgMfOIPAEAAADZ1uR1StG09QvnqPeE0tssvfrURGNWV1erqqoq0X22ReQJAAAAyDZGShPwztwbg8ccN25c8Jh5RJ4AAACAbKMoBQAAAACkhqIUAAAAAJAaitIEdB11WfCYM2bMCB4zj8gTAAAAkG0UpQno1O3w4DEHDhwYPGYekScAAAAg2yhKE7Di5jHBY/bs2TN4zDwiTwAAAEC2UZQCAAAAAFJDUQoAAAAASA1FaQI6Hzc0eMyxY8cGj5lH5AkAAADINorSBOw/7LvBY1ZXVwePmUfkCQAAAMg2itIE1N1+YfCYzCpbHvIEAAAAZBtFaQI2rVoSPOaCBQuCx8wj8gQAAABkG0UpAAAAACA1FKUJ6NC5S/CY3bt3Dx4zj8gTAAAAkG0UpQnodcEdwWOuXLkyeMw8Ik8AAABAtlGUJmDdE3cGjzlp0qTgMfOIPAEAAADZRlGagHefvCt4zMmTJwePmUfkCQAAAMg2ilIAAAAAQGooSgEAAAAAqaEoTcBBY6YEj1lTUxM8Zh6RJwAAACDbKEoBAAAAAKmhKE3AW9MvCh6zsrIyeMw8Ik8AAABAtlGUAgAAAABSQ1EKAAAAAEgNRWkC9jnxzOAxJ06cGDxmHpEnAAAAINvM3dNugyorKz3rs6T2njC7Ra9fevWpCbUEAAAAAPLFzGrdveiEL4yUJmD5TecEj9mjR4/gMfOIPAEAAADZRlGagC0b3gkes66uLnjMPCJPAAAAQLZRlAIAAAAAUkNRmoBO3foEjzlgwIDgMfOIPAEAAADZRlGagO7nXhc8Zm1tbfCYeUSeAAAAgGyjKE3A2jk3BI9ZVVUVPGYekScAAAAg25osSs3sVjNbbWYvFCybZGYrzGxhfDulYN0lZrbYzF41s6Gt1fAs2bBobvCY06ZNCx4zj8gTAAAAkG3ljJTeLmlYkeX/4e4V8e13kmRmR0saLalf/JqbzaxDUo0FAAAAALQtTRal7v6YpHKveXKapLvd/SN3f0PSYkkntKB9AAAAAIA2rCXnlI43s+fjw3v3i5f1lLSsYJvl8bI2ref504PHXLFiRfCYeUSeAAAAgGzb2aL0Fkl9JFVIqpN0bbzcimzrxXZgZlVmVmNmNWvWrNnJZmTDplWLg8dkVtnykCcAAAAg23aqKHX3Ve6+xd0/ljRN2w7RXS7p4IJNe0la2cg+qt290t0ru3btujPNyIw1910RPObIkSODx8wj8gQAAABk204VpWbWveDpVyXVz8w7Q9JoM/uEmR0mqa+k+S1rIgAAAACgrerY1AZmdpekIZIOMLPlkiZKGmJmFYoOzV0qaZwkufuLZnaPpJckbZZ0gbtvaZWWAwAAAAByr8mi1N3PLLL4VyW2v0rSVS1pVN50GTo+eMypU6cGj5lH5AkAAADItpbMvovYXhXFLuPauqqqqoLHzCPyBAAAAGQbRWkC3rxmePCYZsUmOkZD5AkAAADINopSAAAAAEBqKEoBAAAAAKmhKE3A7n0GBY85fHj4Q4bziDwBAAAA2UZRmoADvz4xeMyZM2cGj5lH5AkAAADINorSBKy+d3LwmCNGjAgeM4/IEwAAAJBtFKUJ2Ljk2eAxZ82aFTxmHpEnAAAAINsoSgEAAAAAqaEoBQAAAACkhqI0AYdeHP4QUXcPHjOPyBMAAACQbRSlCVi/cE7wmNXV1cFj5hF5AgAAALKNojQB78y9MXjMcePGBY+ZR+QJAAAAyDaKUgAAAABAaihKAQAAAACpoShNQNdRlwWPOWPGjOAx84g8AQAAANlGUZqATt0ODx5z4MCBwWPmEXkCAAAAso2iNAErbh4TPGbPnj2Dx8wj8gQAAABkG0UpAAAAACA1FKUAAAAAgNRQlCag83FDg8ccO3Zs8Jh5RJ4AAACAbKMoTcD+w74bPGZ1dXXwmHlEngAAAIBsoyhNQN3tFwaPyayy5SFPAAAAQLZRlCZg06olwWMuWLAgeMw8Ik8AAABAtlGUAgAAAABSQ1GagA6duwSP2b179+Ax84g8AQAAANlGUZqAXhfcETzmypUrg8fMI/IEAAAAZBtFaQLWPXFn8JiTJk0KHjOPyBMAAACQbRSlCXj3ybuCx5w8eXLwmHlEngAAAIBsoygFAAAAAKSmY9oNaC96T5hdcv3Sq08N1BIAAAAAyA5GShNw0JgpwWPW1NQEj5lH5AkAAADINopSAAAAAEBqKEoT8Nb0i4LHrKysDB4zj8gTAAAAkG0UpQAAAACA1FCUAgAAAABSQ1GagH1OPDN4zIkTJwaPmUfkCQAAAMg2c/e026DKykrP+iypTV3SpaW4JAwAAACAtsrMat296IQvjJQmYPlN5wSP2aNHj+Ax84g8AQAAANlGUZqALRveCR6zrq4ueMw8Ik8AAABAtjVZlJrZrWa22sxeKFjWxcweMrPX4/v9CtZdYmaLzexVMxvaWg0HAAAAAORfOSOlt0sa1mDZBEmPuHtfSY/Ez2VmR0saLalf/JqbzaxDYq3NqE7d+gSPOWDAgOAx84g8AQAAANnWZFHq7o9Janh86mmSpsePp0s6vWD53e7+kbu/IWmxpBOSaWp2dT/3uuAxa2trg8fMI/IEAAAAZNvOnlPazd3rJCm+PzBe3lPSsoLtlsfL2rS1c24IHrOqqip4zDwiTwAAAEC2JT3RkRVZVvSaM2ZWZWY1ZlazZs2ahJsR1oZFc4PHnDZtWvCYeUSeAAAAgGzb2aJ0lZl1l6T4fnW8fLmkgwu26yVpZbEduHu1u1e6e2XXrl13shkAAAAAgDzb2aJ0hqQx8eMxkh4oWD7azD5hZodJ6itpfsuaCAAAAABoqzo2tYGZ3SVpiKQDzGy5pImSrpZ0j5l9W9JfJJ0hSe7+opndI+klSZslXeDuW1qp7ZnR8/zpTW+UsBUrVgSPmUfkCQAAAMi2JotSdz+zkVVfbGT7qyRd1ZJG5c2mVYvVca/9g8asra1Vjx49gsbMI/IEAAAAZFvSEx21S2vuuyJ4zJEjRwaPmUfkCQAAAMg2ilIAAAAAQGooSgEAAAAAqaEoTUCXoeODx5w6dWrwmHlEngAAAIBsoyhNwF4Vw4LHrKqqCh4zj8gTAAAAkG0UpQl485rhwWOaWfCYeUSeAAAAgGyjKAUAAAAApIaiFAAAAACQGorSBOzeZ1DwmMOHhz9kOI/IEwAAAJBtFKUJOPDrE4PHnDlzZvCYeUSeAAAAgGzrmHYD2oLV905ucWHae8LskuuXXn3qds9HjBhBwVUG8gQAAABkGyOlCdi45NngMWfNmhU8Zh6RJwAAACDbKEoBAAAAAKmhKAUAAAAApIaiNAGHXhz+EFF3Dx4zj8gTAAAAkG0UpQlYv3BO8JjV1dXBY+YReQIAAACyjaI0Ae/MvTF4zHHjxgWPmUfkCQAAAMg2ilIAAAAAQGooSgEAAAAAqaEoTUDXUZcFjzljxozgMfOIPAEAAADZ1jHtBrQFnbod3uoxek+Yvd3zzev/qu8+uW3Z0qtPbfU25NHAgQPTbgIAAACAEhgpTcCKm8e0i5h51LNnz7SbAAAAAKAEilIAAAAAQGooSgEAAAAAqaEoTUDn44a2i5h5NHbs2LSbAAAAAKAEitIE7D/su+0iZh5VV1en3QQAAAAAJVCUJqDu9gvbRcw8YvZdAAAAINsoShOwadWSdhEzjxYsWJB2EwAAAACUQFEKAAAAAEhNx7Qb0BZ06NylTcTsPWF2yfVLrz418ZitrXv37mk3AQAAAEAJjJQmoNcFd7SLmHm0cuXKtJsAAAAAoASK0gSse+LOdhEzjyZNmpR2EwAAAACUQFGagHefvKtdxMyjyZMnp90EAAAAACVQlAIAAAAAUkNRCgAAAABIDUVpAg4aM6VdxMyjmpqatJsAAAAAoAQuCdOONHXJFwAAAAAIjZHSBLw1/aJ2ETOPKisr024CAAAAgBIoSgEAAAAAqeHw3TaCQ3MBAAAA5FGLilIzWyppvaQtkja7e6WZdZH0G0m9JS2V9A13/2vLmplt+5x4ZruImUcTJ05MuwkAAAAASkji8N0vuHuFu9efvDdB0iPu3lfSI/HzNm3fz36rXcTMo0mTJqXdBAAAAAAltMY5padJmh4/ni7p9FaIkSnLbzqnXcTMox49eqTdBAAAAAAltLQodUkPmlmtmVXFy7q5e50kxfcHtjBG5m3Z8E67iJlHdXV1aTcBAAAAQAktnejoRHdfaWYHSnrIzF4p94VxEVslSYccckgLmwEAAAAAyKMWjZS6+8r4frWk+yWdIGmVmXWXpPh+dSOvrXb3Snev7Nq1a0uakbpO3fq0i5h5NGDAgLSbAAAAAKCEnS5KzWxPM9ur/rGkL0t6QdIMSWPizcZIeqCljcy67ude1y5i5lFtbW3aTQAAAABQQktGSrtJesLMFkmaL2m2u8+RdLWkL5nZ65K+FD9v09bOuaFdxMyjqqqqpjcCAAAAkJqdLkrd/c/uflx86+fuV8XL17r7F929b3zf5mfk2bBobruImUfTpk1LuwkAAAAASmiNS8IAAAAAAFCWls6+i3ak94TZJdcvvfrUQC0BAAAA0FYwUpqAnudPbxcx82jFihVpNwEAAABACRSlCdi0anG7iJlHzL4LAAAAZBtFaQLW3HdFu4iZRyNHjky7CQAAAABK4JxSJIZzTgEAAAA0FyOlAAAAAIDUMFKagC5Dx7eLmK2tNUZap06durPNAQAAABAARWkC9qoY1i5itlRTRWdrqKqqCh4TAAAAQPk4fDcBb14zvF3EzCMzS7sJAAAAAEqgKAUAAAAApIaiFAAAAACQGorSBOzeZ1C7iJlHw4dzmDMAAACQZUx0lIADvz6xXcTMuqITKfX75+2Wc61UAAAAIFsoShOw+t7JwYvENGLmUXvLU2tcVgcAAABoTRy+m4CNS55tFzHziDwBAAAA2cZIKXIjxHVOGWkEAAAAwmKkFAAAAACQGorSBBx68ax2ETOPyBMAAACQbRSlCVi/cE67iJlH5AkAAADINorSBLwz98Z2ETOPyBMAAACQbRSlAAAAAIDUMPsukCBm7wUAAACah6I0AV1HXdYuYuZRwzyFuKwMAAAAgPJx+G4COnU7vF3EzCPyBAAAAGQbI6UJWHHzmOCXHkkjZh7lLU8c/tty5BAAACBfGCkFAAAAAKSGohQAAAAAkBqK0gR0Pm5ou4iZR+QJAAAAyDbOKU3A/sO+2y5i5lHSeUp79t6045eDczoBAADQHIyUJqDu9gvbRcw8Ik8AAABAtjFSmoBNq5a0i5h5RJ6aj5FOAAAAhERRCgBtDD8sAACAPKEoTUCHzl3aRcw8Ik9oqJzzcinaAAAAwqEoTUCvC+5oFzHzKGt5SnuiorTjZ6UNAAAAyA4mOkrAuifubBcx84g8AQAAANnGSGkC3n3yLu372W+1+Zh5RJ6QR5wTCgAA2hOKUgDNwuG3KEdL+0l7KLz58QEAgAhFKQA00NqFN8UGykHRCgBoLyhKE3DQmCntImYekSdkUUuLXkarWx8FIQAA4bRaUWpmwyRdJ6mDpF+6+9WtFQsAUL4kitrWLspaWhRSVAIAkB+tUpSaWQdJN0n6kqTlkp41sxnu/lJrxEvbW9Mv0qEXz2rzMfOIPAGtI+3R2rTjtwUU7gCArGitkdITJC129z9LkpndLek0SW2yKAUAtC/toSjOQtHa2m3IwntsSh7aWEo5/1ay/h6Q/36I7GutorSnpGUFz5dL+nQrxQIAoFkoKrO//zxoCzloDzNlp11Qhegnrf0DDdLX1n/gMXdPfqdmZ0ga6u7/FD8/W9IJ7v7dgm2qJFXFT4+U9GriDUnOAZLeTrsRyAX6CspFX0Fz0F9QLvoKmoP+gnIl0VcOdfeuxVa01kjpckkHFzzvJWll4QbuXi2pupXiJ8rMaty9Mu12IPvoKygXfQXNQX9BuegraA76C8rV2n1ll1ba77OS+prZYWbWSdJoSTNaKRYAAAAAIKdaZaTU3Teb2XhJcxVdEuZWd3+xNWIBAAAAAPKr1a5T6u6/k/S71tp/YLk4zBiZQF9BuegraA76C8pFX0Fz0F9QrlbtK60y0REAAAAAAOVorXNKAQAAAABoEkVpCWY2zMxeNbPFZjYh7fYgfWZ2q5mtNrMXCpZ1MbOHzOz1+H6/gnWXxP3nVTMbmk6rkQYzO9jMHjWzl83sRTO7MF5Of8F2zGw3M5tvZovivjI5Xk5fQVFm1sHMnjOzWfFz+gqKMrOlZvYnM1toZjXxMvoLdmBm+5rZvWb2Svzd5TMh+wpFaSPMrIOkmyR9RdLRks40s6PTbRUy4HZJwxosmyDpEXfvK+mR+Lni/jJaUr/4NTfH/Qrtw2ZJP3T3oyQNlnRB3CfoL2joI0knu/txkiokDTOzwaKvoHEXSnq54Dl9BaV8wd0rCi7nQX9BMddJmuPun5J0nKK/McH6CkVp406QtNjd/+zumyTdLem0lNuElLn7Y5LeabD4NEnT48fTJZ1esPxud//I3d+QtFhRv0I74O517r4gfrxe0R/3nqK/oAGPbIif7hrfXPQVFGFmvSSdKumXBYvpK2gO+gu2Y2Z7SzpJ0q8kyd03ufs6BewrFKWN6ylpWcHz5fEyoKFu7l4nRYWIpAPj5fQhSJLMrLek4yU9I/oLiogPx1woabWkh9ydvoLGTJH0E0kfFyyjr6AxLulBM6s1s6p4Gf0FDX1S0hpJt8WnBvzSzPZUwL5CUdo4K7KMqYrRHPQhyMw6S7pP0kXu/l6pTYsso7+0E+6+xd0rJPWSdIKZHVNic/pKO2VmwyWtdvfacl9SZBl9pX050d0HKDod7QIzO6nEtvSX9qujpAGSbnH34yW9r/hQ3UYk3lcoShu3XNLBBc97SVqZUluQbavMrLskxfer4+X0oXbOzHZVVJDe6e6/jRfTX9Co+HCpeYrO0aGvoKETJY00s6WKTis62cz+S/QVNMLdV8b3qyXdr+gQS/oLGlouaXl8lI4k3auoSA3WVyhKG/espL5mdpiZdVJ0Mu+MlNuEbJohaUz8eIykBwqWjzazT5jZYZL6SpqfQvuQAjMzRedmvOzu/16wiv6C7ZhZVzPbN368u6S/l/SK6CtowN0vcfde7t5b0feSP7j7WaKvoAgz29PM9qp/LOnLkl4Q/QUNuPtbkpaZ2ZHxoi9KekkB+0rHlry4LXP3zWY2XtJcSR0k3eruL6bcLKTMzO6SNETSAWa2XNJESVdLusfMvi3pL5LOkCR3f9HM7lH0j3qzpAvcfUsqDUcaTpR0tqQ/xecKStJPRX/BjrpLmh7PXLiLpHvcfZaZPSX6CsrD3xUU003S/dFvpOoo6b/dfY6ZPSv6C3b0XUl3xoNxf5b0j4r/TwrRV8ydQ8UBAAAAAOng8F0AAAAAQGooSgEAAAAAqaEoBQAAAACkhqIUAAAAAJAailIAAAAAQGooSgEAAAAAqaEoBQAAAACkhqIUAAAAAJCa/w8lnEVBHEn47AAAAABJRU5ErkJggg==\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "users_per_item=df.groupby(['item']).count()['rating']\n",
+ "\n",
+ "plt.figure(figsize=(16,8))\n",
+ "plt.hist(users_per_item, bins=100)\n",
+ "\n",
+ "# Let's add median\n",
+ "t=users_per_item.median()\n",
+ "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
+ "plt.text(t*1.1, plt.ylim()[1]*0.9, 'Median: {:.0f}'.format(t))\n",
+ "\n",
+ "# Let's add also some percentiles\n",
+ "t=users_per_item.quantile(0.25)\n",
+ "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
+ "plt.text(t*1.1, plt.ylim()[1]*0.95, '25% quantile: {:.0f}'.format(t))\n",
+ "\n",
+ "t=users_per_item.quantile(0.75)\n",
+ "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
+ "plt.text(t*1.05, plt.ylim()[1]*0.95, '75% quantile: {:.0f}'.format(t))\n",
+ "\n",
+ "plt.title('Number of ratings per item', fontsize=30)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "rating\n",
+ "1 0.06110\n",
+ "2 0.11370\n",
+ "3 0.27145\n",
+ "4 0.34174\n",
+ "5 0.21201\n",
+ "Name: user, dtype: float64"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.groupby(['rating']).count()['user']/len(df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Item attributes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "genres = pd.read_csv('./Datasets/ml-100k/u.genre', sep='|', header=None,\n",
+ " encoding='latin-1')\n",
+ "genres=dict(zip(genres[1], genres[0]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{0: 'unknown',\n",
+ " 1: 'Action',\n",
+ " 2: 'Adventure',\n",
+ " 3: 'Animation',\n",
+ " 4: \"Children's\",\n",
+ " 5: 'Comedy',\n",
+ " 6: 'Crime',\n",
+ " 7: 'Documentary',\n",
+ " 8: 'Drama',\n",
+ " 9: 'Fantasy',\n",
+ " 10: 'Film-Noir',\n",
+ " 11: 'Horror',\n",
+ " 12: 'Musical',\n",
+ " 13: 'Mystery',\n",
+ " 14: 'Romance',\n",
+ " 15: 'Sci-Fi',\n",
+ " 16: 'Thriller',\n",
+ " 17: 'War',\n",
+ " 18: 'Western'}"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "genres"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "movies = pd.read_csv('./Datasets/ml-100k/u.item', sep='|', encoding='latin-1', header=None)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 7 | \n",
+ " 8 | \n",
+ " 9 | \n",
+ " ... | \n",
+ " 14 | \n",
+ " 15 | \n",
+ " 16 | \n",
+ " 17 | \n",
+ " 18 | \n",
+ " 19 | \n",
+ " 20 | \n",
+ " 21 | \n",
+ " 22 | \n",
+ " 23 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Toy Story (1995) | \n",
+ " 01-Jan-1995 | \n",
+ " NaN | \n",
+ " http://us.imdb.com/M/title-exact?Toy%20Story%2... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " GoldenEye (1995) | \n",
+ " 01-Jan-1995 | \n",
+ " NaN | \n",
+ " http://us.imdb.com/M/title-exact?GoldenEye%20(... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " Four Rooms (1995) | \n",
+ " 01-Jan-1995 | \n",
+ " NaN | \n",
+ " http://us.imdb.com/M/title-exact?Four%20Rooms%... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
3 rows × 24 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 \\\n",
+ "0 1 Toy Story (1995) 01-Jan-1995 NaN \n",
+ "1 2 GoldenEye (1995) 01-Jan-1995 NaN \n",
+ "2 3 Four Rooms (1995) 01-Jan-1995 NaN \n",
+ "\n",
+ " 4 5 6 7 8 9 ... \\\n",
+ "0 http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 0 1 1 ... \n",
+ "1 http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 1 0 0 ... \n",
+ "2 http://us.imdb.com/M/title-exact?Four%20Rooms%... 0 0 0 0 0 ... \n",
+ "\n",
+ " 14 15 16 17 18 19 20 21 22 23 \n",
+ "0 0 0 0 0 0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 0 0 1 0 0 \n",
+ "2 0 0 0 0 0 0 0 1 0 0 \n",
+ "\n",
+ "[3 rows x 24 columns]"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "movies[:3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for i in range(19):\n",
+ " movies[i+5]=movies[i+5].apply(lambda x: genres[i] if x==1 else '')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "movies['genre']=movies.iloc[:, 5:].apply(lambda x: ', '.join(x[x!='']), axis = 1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "movies=movies[[0,1,'genre']]\n",
+ "movies.columns=['id', 'title', 'genres']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " title | \n",
+ " genres | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Toy Story (1995) | \n",
+ " Animation, Children's, Comedy | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " GoldenEye (1995) | \n",
+ " Action, Adventure, Thriller | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " Four Rooms (1995) | \n",
+ " Thriller | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " Get Shorty (1995) | \n",
+ " Action, Comedy, Drama | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " Copycat (1995) | \n",
+ " Crime, Drama, Thriller | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id title genres\n",
+ "0 1 Toy Story (1995) Animation, Children's, Comedy\n",
+ "1 2 GoldenEye (1995) Action, Adventure, Thriller\n",
+ "2 3 Four Rooms (1995) Thriller\n",
+ "3 4 Get Shorty (1995) Action, Comedy, Drama\n",
+ "4 5 Copycat (1995) Crime, Drama, Thriller"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "movies.to_csv('./Datasets/ml-100k/movies.csv', index=False)\n",
+ "movies[:5]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Toy example"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "os.makedirs('./Datasets/toy-example/', exist_ok = True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "toy_train=pd.DataFrame([[0,0,3,0], [0,10,4,0], [0,40,5,0], [0,70,4,0],\n",
+ " [10,10,1,0], [10,20,2,0], [10,30,3,0],\n",
+ " [20,30,5,0], [20,50,3,0], [20,60,4,0]])\n",
+ "toy_test=pd.DataFrame([[0,60,3,0],\n",
+ " [10,40,5,0],\n",
+ " [20,0,5,0], [20,20,4,0], [20,70,2,0]])\n",
+ "\n",
+ "toy_train.to_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, index=False)\n",
+ "toy_test.to_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/.ipynb_checkpoints/P1. Baseline-checkpoint.ipynb b/.ipynb_checkpoints/P1. Baseline-checkpoint.ipynb
new file mode 100644
index 0000000..85b9494
--- /dev/null
+++ b/.ipynb_checkpoints/P1. Baseline-checkpoint.ipynb
@@ -0,0 +1,1527 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Preparing dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import scipy.sparse as sparse\n",
+ "from collections import defaultdict\n",
+ "from itertools import chain\n",
+ "import random\n",
+ "\n",
+ "train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
+ "test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Let's prepare dataset\n",
+ "train_and_test=pd.concat([train_read, test_read], axis=0, ignore_index=True)\n",
+ "train_and_test['user_code'] = train_and_test['user'].astype(\"category\").cat.codes\n",
+ "train_and_test['item_code'] = train_and_test['item'].astype(\"category\").cat.codes\n",
+ "\n",
+ "user_code_id = dict(enumerate(train_and_test['user'].astype(\"category\").cat.categories))\n",
+ "user_id_code = dict((v, k) for k, v in user_code_id.items())\n",
+ "item_code_id = dict(enumerate(train_and_test['item'].astype(\"category\").cat.categories))\n",
+ "item_id_code = dict((v, k) for k, v in item_code_id.items())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " item | \n",
+ " rating | \n",
+ " timestamp | \n",
+ " user_code | \n",
+ " item_code | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 664 | \n",
+ " 525 | \n",
+ " 4 | \n",
+ " 876526580 | \n",
+ " 663 | \n",
+ " 524 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 49 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 888068651 | \n",
+ " 48 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 352 | \n",
+ " 273 | \n",
+ " 2 | \n",
+ " 884290328 | \n",
+ " 351 | \n",
+ " 272 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 618 | \n",
+ " 96 | \n",
+ " 3 | \n",
+ " 891307749 | \n",
+ " 617 | \n",
+ " 95 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 560 | \n",
+ " 24 | \n",
+ " 2 | \n",
+ " 879976772 | \n",
+ " 559 | \n",
+ " 23 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user item rating timestamp user_code item_code\n",
+ "0 664 525 4 876526580 663 524\n",
+ "1 49 1 2 888068651 48 0\n",
+ "2 352 273 2 884290328 351 272\n",
+ "3 618 96 3 891307749 617 95\n",
+ "4 560 24 2 879976772 559 23"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_and_test[:5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "train_df=pd.merge(train_read, train_and_test, on=list(train_read.columns))\n",
+ "test_df=pd.merge(test_read, train_and_test, on=list(train_read.columns))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Take number of users and items\n",
+ "(U,I)=(train_and_test['user_code'].max()+1, train_and_test['item_code'].max()+1)\n",
+ "\n",
+ "# Create sparse csr matrices\n",
+ "train_ui = sparse.csr_matrix((train_df['rating'], (train_df['user_code'], train_df['item_code'])), shape=(U, I))\n",
+ "test_ui = sparse.csr_matrix((test_df['rating'], (test_df['user_code'], test_df['item_code'])), shape=(U, I))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Above steps are the same for many algorithms, so I put the code in separate file:\n",
+ "import helpers\n",
+ "train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n",
+ "test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
+ "train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### CSR matrices - what is it?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "<3x4 sparse matrix of type ''\n",
+ "\twith 8 stored elements in Compressed Sparse Row format>"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "row = np.array([0, 0, 0, 1, 1, 2, 2, 2])\n",
+ "col = np.array([0, 1, 2, 1, 3, 2, 0, 3])\n",
+ "data = np.array([4, 1, 3, 2,1, 5, 2, 4])\n",
+ "sample_csr=sparse.csr_matrix((data, (row, col)))\n",
+ "sample_csr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Ratings matrix with missing entries replaced by zeros:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[4, 1, 3, 0],\n",
+ " [0, 2, 0, 1],\n",
+ " [2, 0, 5, 4]], dtype=int32)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of ratings: 8\n",
+ "Number of users: 3\n",
+ "Number of items: 4\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('Ratings matrix with missing entries replaced by zeros:')\n",
+ "display(sample_csr.todense())\n",
+ "\n",
+ "print(f'Number of ratings: {sample_csr.nnz}')\n",
+ "print(f'Number of users: {sample_csr.shape[0]}')\n",
+ "print(f'Number of items: {sample_csr.shape[1]}')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Ratings data: [4 1 3 2 1 2 5 4]\n",
+ "Regarding items: [0 1 2 1 3 0 2 3]\n",
+ "Where ratings from 0 to 2 belongs to user 0.\n",
+ "Where ratings from 3 to 4 belongs to user 1.\n",
+ "Where ratings from 5 to 7 belongs to user 2.\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('Ratings data:', sample_csr.data)\n",
+ "\n",
+ "print('Regarding items:', sample_csr.indices)\n",
+ "\n",
+ "for i in range(sample_csr.shape[0]):\n",
+ " print(f'Where ratings from {sample_csr.indptr[i]} to {sample_csr.indptr[i+1]-1} belongs to user {i}.')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Efficient way to access items rated by user:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([ 0, 6, 10, 27, 49, 78, 95, 97, 116, 143, 153, 156, 167,\n",
+ " 171, 172, 173, 194, 208, 225, 473, 495, 549, 615], dtype=int32)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "471 ns ± 15.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n",
+ "Inefficient way to access items rated by user:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([ 0, 6, 10, 27, 49, 78, 95, 97, 116, 143, 153, 156, 167,\n",
+ " 171, 172, 173, 194, 208, 225, 473, 495, 549, 615])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "48.3 µs ± 1.51 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "user=123\n",
+ "\n",
+ "print('Efficient way to access items rated by user:')\n",
+ "display(train_ui.indices[train_ui.indptr[user]:train_ui.indptr[user+1]])\n",
+ "%timeit train_ui.indices[train_ui.indptr[user]:train_ui.indptr[user+1]]\n",
+ "\n",
+ "print('Inefficient way to access items rated by user:')\n",
+ "display(train_ui[user].indices)\n",
+ "%timeit train_ui[user].indices"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "###### Example: subtracting row means"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Our matrix:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[4, 1, 3, 0],\n",
+ " [0, 2, 0, 1],\n",
+ " [2, 0, 5, 4]], dtype=int32)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "List of row sums:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[ 8, 3, 11]])"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "print('Our matrix:')\n",
+ "display(sample_csr.todense())\n",
+ "print('List of row sums:')\n",
+ "sample_csr.sum(axis=1).ravel()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Array with row means:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([2.66666667, 1.5 , 3.66666667])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Diagonal csr matrix with inverse of row sums on diagonal:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[2.66666667, 0. , 0. ],\n",
+ " [0. , 1.5 , 0. ],\n",
+ " [0. , 0. , 3.66666667]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Let's apply them in nonzero entries:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[2.66666667, 2.66666667, 2.66666667, 0. ],\n",
+ " [0. , 1.5 , 0. , 1.5 ],\n",
+ " [3.66666667, 0. , 3.66666667, 3.66666667]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Finally after subtraction:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[ 1.33333333, -1.66666667, 0.33333333, 0. ],\n",
+ " [ 0. , 0.5 , 0. , -0.5 ],\n",
+ " [-1.66666667, 0. , 1.33333333, 0.33333333]])"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "print('Array with row means:')\n",
+ "row_means=np.asarray(sample_csr.sum(axis=1).ravel())[0]/np.diff(sample_csr.indptr)\n",
+ "display(row_means)\n",
+ "\n",
+ "print('Diagonal csr matrix with inverse of row sums on diagonal:')\n",
+ "display(sparse.diags(row_means).todense())\n",
+ "\n",
+ "print(\"\"\"Let's apply them in nonzero entries:\"\"\")\n",
+ "to_subtract=sparse.diags(row_means)*(sample_csr>0)\n",
+ "display(to_subtract.todense())\n",
+ "\n",
+ "print(\"Finally after subtraction:\")\n",
+ "sample_csr-to_subtract.todense()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "###### Transposing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sample matrix: \n",
+ " [[4 1 3 0]\n",
+ " [0 2 0 1]\n",
+ " [2 0 5 4]]\n",
+ "\n",
+ "Indices: \n",
+ " [0 1 2 1 3 0 2 3]\n",
+ "\n",
+ "Transposed matrix: \n",
+ " [[4 0 2]\n",
+ " [1 2 0]\n",
+ " [3 0 5]\n",
+ " [0 1 4]]\n",
+ "\n",
+ "Indices of transposed matrix: \n",
+ " [0 1 2 1 3 0 2 3]\n",
+ "\n",
+ "Reason: \n",
+ "\n",
+ "After converting to csr: \n",
+ " [0 2 0 1 0 2 1 2]\n"
+ ]
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "from scipy import sparse\n",
+ "row = np.array([0, 0, 0, 1, 1, 2, 2, 2])\n",
+ "col = np.array([0, 1, 2, 1, 3, 2, 0, 3])\n",
+ "data = np.array([4, 1, 3, 2,1, 5, 2, 4])\n",
+ "sample=sparse.csr_matrix((data, (row, col)))\n",
+ "print('Sample matrix: \\n', sample.A)\n",
+ "print('\\nIndices: \\n', sample.indices)\n",
+ "transposed=sample.transpose()\n",
+ "print('\\nTransposed matrix: \\n', transposed.A)\n",
+ "print('\\nIndices of transposed matrix: \\n', transposed.indices)\n",
+ "\n",
+ "print('\\nReason: ', type(transposed))\n",
+ "\n",
+ "print('\\nAfter converting to csr: \\n', transposed.tocsr().indices)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Self made top popular"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "if not os.path.exists('./Recommendations generated/'):\n",
+ " os.mkdir('./Recommendations generated/')\n",
+ " os.mkdir('./Recommendations generated/ml-100k/')\n",
+ " os.mkdir('./Recommendations generated/toy-example/')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "top_pop = []\n",
+ "train_iu = train_ui.transpose().tocsr()\n",
+ "scaling_factor = train_ui.max()/max(np.diff(train_iu.indptr))\n",
+ "\n",
+ "for i in range(train_iu.shape[0]):\n",
+ " top_pop.append((i, (train_iu.indptr[i+1]-train_iu.indptr[i])*scaling_factor))\n",
+ " \n",
+ "top_pop.sort(key=lambda x: x[1], reverse=True)\n",
+ "#top_pop is an array of pairs (item, rescaled_popularity) sorted descending from the most popular\n",
+ "\n",
+ "k = 10\n",
+ "result = []\n",
+ "\n",
+ "for u in range(train_ui.shape[0]):\n",
+ " user_rated = train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n",
+ " rec_user = []\n",
+ " item_pos = 0\n",
+ " while len(rec_user)<10:\n",
+ " if top_pop[item_pos][0] not in user_rated:\n",
+ " rec_user.append((item_code_id[top_pop[item_pos][0]], top_pop[item_pos][1]))\n",
+ " item_pos+=1\n",
+ " result.append([user_code_id[u]]+list(chain(*rec_user)))\n",
+ "\n",
+ "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopPop_reco.csv', index=False, header=False)\n",
+ "\n",
+ "\n",
+ "# estimations - score is a bit artificial since that method is not designed for scoring, but for ranking\n",
+ "\n",
+ "estimations=[]\n",
+ "\n",
+ "for user, item in zip(*test_ui.nonzero()):\n",
+ " estimations.append([user_code_id[user], item_code_id[item],\n",
+ " (train_iu.indptr[item+1]-train_iu.indptr[item])*scaling_factor])\n",
+ "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopPop_estimations.csv', index=False, header=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Self made top rated"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "top_rated = []\n",
+ "global_avg = sum(train_iu.data)/train_ui.nnz\n",
+ "\n",
+ "for i in range(train_iu.shape[0]):\n",
+ " ratings = train_iu.data[train_iu.indptr[i]: train_iu.indptr[i+1]]\n",
+ " avg = np.mean(ratings) if len(ratings)>0 else global_avg\n",
+ " top_rated.append((i, avg))\n",
+ " \n",
+ "top_rated.sort(key=lambda x: x[1], reverse=True)\n",
+ " \n",
+ "k=10\n",
+ "result=[]\n",
+ "\n",
+ "for u in range(train_ui.shape[0]):\n",
+ " user_rated=train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n",
+ " rec_user=[]\n",
+ " item_pos=0\n",
+ " while len(rec_user)<10:\n",
+ " if top_rated[item_pos][0] not in user_rated:\n",
+ " rec_user.append((item_code_id[top_rated[item_pos][0]], top_rated[item_pos][1]))\n",
+ " item_pos+=1\n",
+ " result.append([user_code_id[u]]+list(chain(*rec_user)))\n",
+ "\n",
+ "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopRated_reco.csv', index=False, header=False)\n",
+ "\n",
+ "\n",
+ "\n",
+ "estimations=[]\n",
+ "d = dict(top_rated)\n",
+ "\n",
+ "for user, item in zip(*test_ui.nonzero()):\n",
+ " estimations.append([user_code_id[user], item_code_id[item], d[item]])\n",
+ "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopRated_estimations.csv', index=False, header=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 7 | \n",
+ " 8 | \n",
+ " 9 | \n",
+ " ... | \n",
+ " 11 | \n",
+ " 12 | \n",
+ " 13 | \n",
+ " 14 | \n",
+ " 15 | \n",
+ " 16 | \n",
+ " 17 | \n",
+ " 18 | \n",
+ " 19 | \n",
+ " 20 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 814 | \n",
+ " 5.0 | \n",
+ " 1122 | \n",
+ " 5.0 | \n",
+ " 1189 | \n",
+ " 5.0 | \n",
+ " 1201 | \n",
+ " 5.0 | \n",
+ " 1293 | \n",
+ " ... | \n",
+ " 1306 | \n",
+ " 5.0 | \n",
+ " 1467 | \n",
+ " 5.0 | \n",
+ " 1491 | \n",
+ " 5.0 | \n",
+ " 1500 | \n",
+ " 5.0 | \n",
+ " 1536 | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 119 | \n",
+ " 5.0 | \n",
+ " 814 | \n",
+ " 5.0 | \n",
+ " 1122 | \n",
+ " 5.0 | \n",
+ " 1189 | \n",
+ " 5.0 | \n",
+ " 1201 | \n",
+ " ... | \n",
+ " 1293 | \n",
+ " 5.0 | \n",
+ " 1306 | \n",
+ " 5.0 | \n",
+ " 1467 | \n",
+ " 5.0 | \n",
+ " 1491 | \n",
+ " 5.0 | \n",
+ " 1500 | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 21 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 4 5 6 7 8 9 ... 11 12 13 \\\n",
+ "0 1 814 5.0 1122 5.0 1189 5.0 1201 5.0 1293 ... 1306 5.0 1467 \n",
+ "1 2 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n",
+ "\n",
+ " 14 15 16 17 18 19 20 \n",
+ "0 5.0 1491 5.0 1500 5.0 1536 5.0 \n",
+ "1 5.0 1467 5.0 1491 5.0 1500 5.0 \n",
+ "\n",
+ "[2 rows x 21 columns]"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.DataFrame(result)[:2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Self-made baseline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class selfBaselineUI():\n",
+ " \n",
+ " def fit(self, train_ui):\n",
+ " self.train_ui=train_ui.copy()\n",
+ " self.train_iu=train_ui.transpose().tocsr()\n",
+ " \n",
+ " result=self.train_ui.copy()\n",
+ " \n",
+ " self.row_means=np.asarray(result.sum(axis=1).ravel())[0]/np.diff(result.indptr)\n",
+ " \n",
+ " # in csr format after addition or multiplication 0 entries \"disappear\" - so some workaraunds are needed \n",
+ " # (other option is to define addition/multiplication in a desired way)\n",
+ " row_means=self.row_means.copy()\n",
+ " \n",
+ " max_row_mean=np.max(row_means)\n",
+ " row_means[row_means==0]=max_row_mean+1\n",
+ " to_subtract_rows=sparse.diags(row_means)*(result.power(0))\n",
+ " to_subtract_rows.sort_indices() # needed to have valid .data\n",
+ " \n",
+ " subtract=to_subtract_rows.data\n",
+ " subtract[subtract==max_row_mean+1]=0\n",
+ " \n",
+ " result.data=result.data-subtract\n",
+ "# we can't do result=train_ui-to_subtract_rows since then 0 entries will \"disappear\" in csr format\n",
+ " self.col_means=np.divide(np.asarray(result.sum(axis=0).ravel())[0], np.diff(self.train_iu.indptr),\\\n",
+ " out=np.zeros(self.train_iu.shape[0]), where=np.diff(self.train_iu.indptr)!=0) # handling items without ratings\n",
+ " \n",
+ " # again - it is possible that some mean will be zero, so let's use the same workaround\n",
+ " col_means=self.col_means.copy()\n",
+ " \n",
+ " max_col_mean=np.max(col_means)\n",
+ " col_means[col_means==0]=max_col_mean+1\n",
+ " to_subtract_cols=result.power(0)*sparse.diags(col_means)\n",
+ " to_subtract_cols.sort_indices() # needed to have valid .data\n",
+ " \n",
+ " subtract=to_subtract_cols.data\n",
+ " subtract[subtract==max_col_mean+1]=0\n",
+ " \n",
+ " result.data=result.data-subtract\n",
+ "\n",
+ " return result\n",
+ " \n",
+ " \n",
+ " def recommend(self, user_code_id, item_code_id, topK=10):\n",
+ " estimations=np.tile(self.row_means[:,None], [1, self.train_ui.shape[1]]) +np.tile(self.col_means, [self.train_ui.shape[0], 1])\n",
+ " \n",
+ " top_k = defaultdict(list)\n",
+ " for nb_user, user in enumerate(estimations):\n",
+ " \n",
+ " user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]\n",
+ " for item, score in enumerate(user):\n",
+ " if item not in user_rated:\n",
+ " top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n",
+ " result=[]\n",
+ " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n",
+ " for uid, item_scores in top_k.items():\n",
+ " item_scores.sort(key=lambda x: x[1], reverse=True)\n",
+ " result.append([uid]+list(chain(*item_scores[:topK])))\n",
+ " return result\n",
+ " \n",
+ " def estimate(self, user_code_id, item_code_id, test_ui):\n",
+ " result=[]\n",
+ " for user, item in zip(*test_ui.nonzero()):\n",
+ " result.append([user_code_id[user], item_code_id[item], self.row_means[user]+self.col_means[item]])\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Training data:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n",
+ " [0, 1, 2, 3, 0, 0, 0, 0],\n",
+ " [0, 0, 0, 5, 0, 3, 4, 0]], dtype=int64)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "After subtracting rows and columns:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[ 0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ],\n",
+ " [ 0. , -0.5, 0. , 0. , 0. , 0. , 0. , 0. ],\n",
+ " [ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Recommend best unseen item:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[[0, 30, 5.0], [10, 40, 3.0], [20, 40, 5.0]]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Print estimations on unseen items:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " item | \n",
+ " est_score | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 60 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 10 | \n",
+ " 40 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20 | \n",
+ " 0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 20 | \n",
+ " 20 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 20 | \n",
+ " 70 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user item est_score\n",
+ "0 0 60 4.0\n",
+ "1 10 40 3.0\n",
+ "2 20 0 3.0\n",
+ "3 20 20 4.0\n",
+ "4 20 70 4.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
+ "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
+ "\n",
+ "toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \\\n",
+ "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n",
+ "\n",
+ "print('Training data:')\n",
+ "display(toy_train_ui.todense())\n",
+ "\n",
+ "model=selfBaselineUI()\n",
+ "print('After subtracting rows and columns:')\n",
+ "display(model.fit(toy_train_ui).todense())\n",
+ "\n",
+ "print('Recommend best unseen item:')\n",
+ "display(model.recommend(toy_user_code_id, toy_item_code_id, topK=1))\n",
+ "\n",
+ "print('Print estimations on unseen items:')\n",
+ "estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_ui))\n",
+ "estimations.columns=['user', 'item', 'est_score']\n",
+ "display(estimations)\n",
+ "\n",
+ "top_n=pd.DataFrame(model.recommend(toy_user_code_id, toy_item_code_id, topK=3))\n",
+ "\n",
+ "top_n.to_csv('Recommendations generated/toy-example/Self_BaselineUI_reco.csv', index=False, header=False)\n",
+ "\n",
+ "estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_ui))\n",
+ "estimations.to_csv('Recommendations generated/toy-example/Self_BaselineUI_estimations.csv', index=False, header=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model=selfBaselineUI()\n",
+ "model.fit(train_ui)\n",
+ "\n",
+ "top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
+ "\n",
+ "top_n.to_csv('Recommendations generated/ml-100k/Self_BaselineUI_reco.csv', index=False, header=False)\n",
+ "\n",
+ "estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
+ "estimations.to_csv('Recommendations generated/ml-100k/Self_BaselineUI_estimations.csv', index=False, header=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# project task 1: implement self-made BaselineIU"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Implement recommender system which will recommend movies (which user hasn't seen) which is similar to BaselineUI but first subtract column means then row means.\n",
+ "\n",
+ "The output should be saved in 'Recommendations generated/ml-100k/Self_BaselineIU_reco.csv' and 'Recommendations generated/ml-100k/Self_BaselineIU_estimations.csv'.\n",
+ "\n",
+ "
\n",
+ "Additional clarification: \n",
+ "\n",
+ "Summarizing, the prediction of the rating of the user u regarding the item i should be equal to b_u + b_i.\n",
+ "The procedure to get b_u and b_i is the following:\n",
+ "- We have the original user-item ratings matrix M.\n",
+ "- For each column representing the item i, we compute the mean of ratings and denote by b_i. From each rating in matrix M we subtract the corresponding column mean (b_i) to receive new matrix M'.\n",
+ "- For each row of matrix M' representing the user u, we compute the mean of ratings and denote by b_u."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class selfBaselineIU():\n",
+ " \n",
+ " def fit(self, train_ui):\n",
+ " self.train_ui=train_ui.copy()\n",
+ " self.train_iu=train_ui.transpose().tocsr()\n",
+ " \n",
+ " result=self.train_ui.copy()\n",
+ " \n",
+ " #we can't do result=train_ui-to_subtract_rows since then 0 entries will \"disappear\" in csr format\n",
+ " self.col_means=np.divide(np.asarray(result.sum(axis=0).ravel())[0], np.diff(self.train_iu.indptr),\\\n",
+ " out=np.zeros(self.train_iu.shape[0]), where=np.diff(self.train_iu.indptr)!=0) # handling items without ratings\n",
+ " \n",
+ " # again - it is possible that some mean will be zero, so let's use the same workaround\n",
+ " col_means=self.col_means.copy()\n",
+ " \n",
+ " max_col_mean=np.max(col_means)\n",
+ " col_means[col_means==0]=max_col_mean+1\n",
+ " to_subtract_cols=result.power(0)*sparse.diags(col_means)\n",
+ " to_subtract_cols.sort_indices() # needed to have valid .data\n",
+ " \n",
+ " subtract=to_subtract_cols.data\n",
+ " subtract[subtract==max_col_mean+1]=0\n",
+ " \n",
+ " result.data=result.data-subtract\n",
+ "\n",
+ "\n",
+ " self.row_means=np.asarray(result.sum(axis=1).ravel())[0]/np.diff(result.indptr)\n",
+ " \n",
+ " # in csr format after addition or multiplication 0 entries \"disappear\" - so some workaraunds are needed \n",
+ " # (other option is to define addition/multiplication in a desired way)\n",
+ " row_means=self.row_means.copy()\n",
+ " \n",
+ " max_row_mean=np.max(row_means)\n",
+ " row_means[row_means==0]=max_row_mean+1\n",
+ " to_subtract_rows=sparse.diags(row_means)*(result.power(0))\n",
+ " to_subtract_rows.sort_indices() # needed to have valid .data\n",
+ " \n",
+ " subtract=to_subtract_rows.data\n",
+ " subtract[subtract==max_row_mean+1]=0\n",
+ " \n",
+ " result.data=result.data-subtract\n",
+ "\n",
+ " return result\n",
+ " \n",
+ " \n",
+ " def recommend(self, user_code_id, item_code_id, topK=10):\n",
+ " estimations=np.tile(self.row_means[:,None], [1, self.train_ui.shape[1]]) +np.tile(self.col_means, [self.train_ui.shape[0], 1])\n",
+ " \n",
+ " top_k = defaultdict(list)\n",
+ " for nb_user, user in enumerate(estimations):\n",
+ " \n",
+ " user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]\n",
+ " for item, score in enumerate(user):\n",
+ " if item not in user_rated:\n",
+ " top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n",
+ " result=[]\n",
+ " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n",
+ " for uid, item_scores in top_k.items():\n",
+ " item_scores.sort(key=lambda x: x[1], reverse=True)\n",
+ " result.append([uid]+list(chain(*item_scores[:topK])))\n",
+ " return result\n",
+ " \n",
+ " def estimate(self, user_code_id, item_code_id, test_ui):\n",
+ " result=[]\n",
+ " for user, item in zip(*test_ui.nonzero()):\n",
+ " result.append([user_code_id[user], item_code_id[item], self.row_means[user]+self.col_means[item]])\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Training data:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n",
+ " [0, 1, 2, 3, 0, 0, 0, 0],\n",
+ " [0, 0, 0, 5, 0, 3, 4, 0]], dtype=int64)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "After subtracting columns and rows:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[-0.375 , 1.125 , 0. , 0. , -0.375 ,\n",
+ " 0. , 0. , -0.375 ],\n",
+ " [ 0. , -0.66666667, 0.83333333, -0.16666667, 0. ,\n",
+ " 0. , 0. , 0. ],\n",
+ " [ 0. , 0. , 0. , 0.66666667, 0. ,\n",
+ " -0.33333333, -0.33333333, 0. ]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Recommend best unseen item:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[[0, 30, 4.375], [10, 40, 4.166666666666667], [20, 40, 5.333333333333333]]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Print estimations on unseen items:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " item | \n",
+ " est_score | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 60 | \n",
+ " 4.375000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 10 | \n",
+ " 40 | \n",
+ " 4.166667 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20 | \n",
+ " 0 | \n",
+ " 3.333333 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 20 | \n",
+ " 20 | \n",
+ " 2.333333 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 20 | \n",
+ " 70 | \n",
+ " 4.333333 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user item est_score\n",
+ "0 0 60 4.375000\n",
+ "1 10 40 4.166667\n",
+ "2 20 0 3.333333\n",
+ "3 20 20 2.333333\n",
+ "4 20 70 4.333333"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
+ "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
+ "\n",
+ "toy_train_iu, toy_test_iu, toy_user_code_id, toy_user_id_code, \\\n",
+ "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n",
+ "\n",
+ "print('Training data:')\n",
+ "display(toy_train_iu.todense())\n",
+ "\n",
+ "model=selfBaselineIU()\n",
+ "print('After subtracting columns and rows:')\n",
+ "display(model.fit(toy_train_iu).todense())\n",
+ "\n",
+ "print('Recommend best unseen item:')\n",
+ "display(model.recommend(toy_user_code_id, toy_item_code_id, topK=1))\n",
+ "\n",
+ "print('Print estimations on unseen items:')\n",
+ "estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_iu))\n",
+ "estimations.columns=['user', 'item', 'est_score']\n",
+ "display(estimations)\n",
+ "\n",
+ "top_n=pd.DataFrame(model.recommend(toy_user_code_id, toy_item_code_id, topK=3))\n",
+ "\n",
+ "top_n.to_csv('Recommendations generated/toy-example/Self_BaselineIU_reco.csv', index=False, header=False)\n",
+ "\n",
+ "estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_iu))\n",
+ "estimations.to_csv('Recommendations generated/toy-example/Self_BaselineIU_estimations.csv', index=False, header=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model=selfBaselineIU()\n",
+ "model.fit(train_ui)\n",
+ "\n",
+ "top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
+ "\n",
+ "top_n.to_csv('Recommendations generated/Projects/Project1_Self_BaselineIU_reco.csv', index=False, header=False)\n",
+ "\n",
+ "estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
+ "estimations.to_csv('Recommendations generated/Projects/Project1_Self_BaselineIU_estimations.csv', index=False, header=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Ready-made baseline - Surprise implementation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimating biases using als...\n"
+ ]
+ }
+ ],
+ "source": [
+ "import surprise as sp\n",
+ "import time\n",
+ "\n",
+ "# Based on surprise.readthedocs.io\n",
+ "def get_top_n(predictions, n=10):\n",
+ " \n",
+ " # Here we create a dictionary which items are lists of pairs (item, score)\n",
+ " top_n = defaultdict(list)\n",
+ " for uid, iid, true_r, est, _ in predictions:\n",
+ " top_n[uid].append((iid, est))\n",
+ " \n",
+ " result=[]\n",
+ " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n",
+ " for uid, user_ratings in top_n.items():\n",
+ " user_ratings.sort(key=lambda x: x[1], reverse=True)\n",
+ " result.append([uid]+list(chain(*user_ratings[:n]))) \n",
+ " return result\n",
+ "\n",
+ "\n",
+ "reader = sp.Reader(line_format='user item rating timestamp', sep='\\t')\n",
+ "trainset = sp.Dataset.load_from_file('./Datasets/ml-100k/train.csv', reader=reader)\n",
+ "trainset = trainset.build_full_trainset() # -> it is needed for using Surprise package\n",
+ "\n",
+ "testset = sp.Dataset.load_from_file('./Datasets/ml-100k/test.csv', reader=reader)\n",
+ "testset = sp.Trainset.build_testset(testset.build_full_trainset())\n",
+ "\n",
+ "algo = sp.BaselineOnly()\n",
+ "# algo = sp.BaselineOnly(bsl_options={'method':'sgd', 'reg':0, 'n_epochs':2000})\n",
+ "# observe how bad results gives above algorithm\n",
+ "# more details http://courses.ischool.berkeley.edu/i290-dm/s11/SECURE/a1-koren.pdf - chapter 2.1\n",
+ "\n",
+ "algo.fit(trainset)\n",
+ "\n",
+ "antitrainset = trainset.build_anti_testset() # We want to predict ratings of pairs (user, item) which are not in train set\n",
+ "predictions = algo.test(antitrainset)\n",
+ "\n",
+ "top_n = get_top_n(predictions, n=10)\n",
+ "\n",
+ "top_n=pd.DataFrame(top_n)\n",
+ "\n",
+ "top_n.to_csv('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', index=False, header=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RMSE: 0.9495\n",
+ "MAE: 0.7525\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "0.7524871012820799"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Compute RMSE on testset using buildin functions\n",
+ "predictions = algo.test(testset)\n",
+ "sp.accuracy.rmse(predictions, verbose=True)\n",
+ "\n",
+ "# Let's also save the results in file\n",
+ "predictions_df=[]\n",
+ "for uid, iid, true_r, est, _ in predictions:\n",
+ " predictions_df.append([uid, iid, est])\n",
+ " \n",
+ "predictions_df=pd.DataFrame(predictions_df)\n",
+ "predictions_df.to_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', index=False, header=False)\n",
+ "\n",
+ "sp.accuracy.mae(predictions, verbose=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##### Let's compare with random"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RMSE: 1.5165\n",
+ "MAE: 1.2172\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "1.2172144988785374"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# in surprise random is an algorithm predicting random value regarding to normal distribution estimated from train set\n",
+ "algo = sp.NormalPredictor()\n",
+ "algo.fit(trainset)\n",
+ "\n",
+ "antitrainset = trainset.build_anti_testset() # We want to predict ratings of pairs (user, item) which are not in train set\n",
+ "predictions = algo.test(antitrainset)\n",
+ "\n",
+ "top_n = get_top_n(predictions, n=10)\n",
+ "\n",
+ "top_n=pd.DataFrame(top_n)\n",
+ "\n",
+ "top_n.to_csv('Recommendations generated/ml-100k/Ready_Random_reco.csv', index=False, header=False)\n",
+ "\n",
+ "# Compute RMSE on testset using buildin functions\n",
+ "predictions = algo.test(testset)\n",
+ "sp.accuracy.rmse(predictions, verbose=True)\n",
+ "\n",
+ "# Let's also save the results in file\n",
+ "predictions_df=[]\n",
+ "for uid, iid, true_r, est, _ in predictions:\n",
+ " predictions_df.append([uid, iid, est])\n",
+ " \n",
+ "predictions_df=pd.DataFrame(predictions_df)\n",
+ "predictions_df.to_csv('Recommendations generated/ml-100k/Ready_Random_estimations.csv', index=False, header=False)\n",
+ "\n",
+ "sp.accuracy.mae(predictions, verbose=True)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ },
+ "metadata": {
+ "interpreter": {
+ "hash": "2a3a95f8b675c5b7dd6a35e1675edaf697539b1f0a71c4603e9520a8bbd07d82"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/.ipynb_checkpoints/P2. Evaluation-checkpoint.ipynb b/.ipynb_checkpoints/P2. Evaluation-checkpoint.ipynb
new file mode 100644
index 0000000..d4cadb5
--- /dev/null
+++ b/.ipynb_checkpoints/P2. Evaluation-checkpoint.ipynb
@@ -0,0 +1,1678 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Prepare test set"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "-"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import scipy.sparse as sparse\n",
+ "from collections import defaultdict\n",
+ "from itertools import chain\n",
+ "import random\n",
+ "from tqdm import tqdm\n",
+ "\n",
+ "# In evaluation we do not load train set - it is not needed\n",
+ "test = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n",
+ "test.columns = [\"user\", \"item\", \"rating\", \"timestamp\"]\n",
+ "\n",
+ "test[\"user_code\"] = test[\"user\"].astype(\"category\").cat.codes\n",
+ "test[\"item_code\"] = test[\"item\"].astype(\"category\").cat.codes\n",
+ "\n",
+ "user_code_id = dict(enumerate(test[\"user\"].astype(\"category\").cat.categories))\n",
+ "user_id_code = dict((v, k) for k, v in user_code_id.items())\n",
+ "item_code_id = dict(enumerate(test[\"item\"].astype(\"category\").cat.categories))\n",
+ "item_id_code = dict((v, k) for k, v in item_code_id.items())\n",
+ "\n",
+ "test_ui = sparse.csr_matrix((test[\"rating\"], (test[\"user_code\"], test[\"item_code\"])))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Estimations metrics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "FileNotFoundError",
+ "evalue": "[Errno 2] No such file or directory: 'Recommendations generated/ml-100k/Ready_Baseline_estimations.csv'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m estimations_df = pd.read_csv(\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;34m\"Recommendations generated/ml-100k/Ready_Baseline_estimations.csv\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m )\n\u001b[0;32m 4\u001b[0m \u001b[0mestimations_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m\"user\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"item\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"score\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)\u001b[0m\n\u001b[0;32m 608\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkwds_defaults\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 609\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 610\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 611\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 612\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 460\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 461\u001b[0m \u001b[1;31m# Create the parser.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 462\u001b[1;33m \u001b[0mparser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 463\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 464\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 817\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"has_index_names\"\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"has_index_names\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 818\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 819\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 820\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 821\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[1;34m(self, engine)\u001b[0m\n\u001b[0;32m 1048\u001b[0m )\n\u001b[0;32m 1049\u001b[0m \u001b[1;31m# error: Too many arguments for \"ParserBase\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1050\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mmapping\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# type: ignore[call-arg]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1051\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1052\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_failover_to_python\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, src, **kwds)\u001b[0m\n\u001b[0;32m 1865\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1866\u001b[0m \u001b[1;31m# open handles\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1867\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_open_handles\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1868\u001b[0m \u001b[1;32massert\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhandles\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1869\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mkey\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m\"storage_options\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"encoding\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"memory_map\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"compression\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_open_handles\u001b[1;34m(self, src, kwds)\u001b[0m\n\u001b[0;32m 1360\u001b[0m \u001b[0mLet\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mreaders\u001b[0m \u001b[0mopen\u001b[0m \u001b[0mIOHanldes\u001b[0m \u001b[0mafter\u001b[0m \u001b[0mthey\u001b[0m \u001b[0mare\u001b[0m \u001b[0mdone\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mtheir\u001b[0m \u001b[0mpotential\u001b[0m \u001b[0mraises\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1361\u001b[0m \"\"\"\n\u001b[1;32m-> 1362\u001b[1;33m self.handles = get_handle(\n\u001b[0m\u001b[0;32m 1363\u001b[0m \u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1364\u001b[0m \u001b[1;34m\"r\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\common.py\u001b[0m in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 640\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"replace\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 641\u001b[0m \u001b[1;31m# Encoding\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 642\u001b[1;33m handle = open(\n\u001b[0m\u001b[0;32m 643\u001b[0m \u001b[0mhandle\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 644\u001b[0m \u001b[0mioargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'Recommendations generated/ml-100k/Ready_Baseline_estimations.csv'"
+ ]
+ }
+ ],
+ "source": [
+ "estimations_df = pd.read_csv(\n",
+ " \"Recommendations generated/ml-100k/Ready_Baseline_estimations.csv\", header=None\n",
+ ")\n",
+ "estimations_df.columns = [\"user\", \"item\", \"score\"]\n",
+ "\n",
+ "estimations_df[\"user_code\"] = [user_id_code[user] for user in estimations_df[\"user\"]]\n",
+ "estimations_df[\"item_code\"] = [item_id_code[item] for item in estimations_df[\"item\"]]\n",
+ "estimations = sparse.csr_matrix(\n",
+ " (\n",
+ " estimations_df[\"score\"],\n",
+ " (estimations_df[\"user_code\"], estimations_df[\"item_code\"]),\n",
+ " ),\n",
+ " shape=test_ui.shape,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def estimations_metrics(test_ui, estimations):\n",
+ " result = []\n",
+ "\n",
+ " RMSE = (np.sum((estimations.data - test_ui.data) ** 2) / estimations.nnz) ** (1 / 2)\n",
+ " result.append([\"RMSE\", RMSE])\n",
+ "\n",
+ " MAE = np.sum(abs(estimations.data - test_ui.data)) / estimations.nnz\n",
+ " result.append([\"MAE\", MAE])\n",
+ "\n",
+ " df_result = (pd.DataFrame(list(zip(*result))[1])).T\n",
+ " df_result.columns = list(zip(*result))[0]\n",
+ " return df_result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "NameError",
+ "evalue": "name 'estimations' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;31m# try !pip3 install pandas=='1.0.3' (or pip if you use python 2) and restart the kernel\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mestimations_metrics\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest_ui\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mestimations\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m: name 'estimations' is not defined"
+ ]
+ }
+ ],
+ "source": [
+ "# in case of error (in the laboratories) you might have to switch to the other version of pandas\n",
+ "# try !pip3 install pandas=='1.0.3' (or pip if you use python 2) and restart the kernel\n",
+ "\n",
+ "estimations_metrics(test_ui, estimations)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Ranking metrics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[663, 475, 62, ..., 472, 269, 503],\n",
+ " [ 48, 313, 475, ..., 591, 175, 466],\n",
+ " [351, 313, 475, ..., 591, 175, 466],\n",
+ " ...,\n",
+ " [259, 313, 475, ..., 11, 591, 175],\n",
+ " [ 33, 313, 475, ..., 11, 591, 175],\n",
+ " [ 77, 313, 475, ..., 11, 591, 175]])"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "\n",
+ "reco = np.loadtxt(\n",
+ " \"Recommendations generated/ml-100k/Ready_Baseline_reco.csv\", delimiter=\",\"\n",
+ ")\n",
+ "# Let's ignore scores - they are not used in evaluation:\n",
+ "users = reco[:, :1]\n",
+ "items = reco[:, 1::2]\n",
+ "# Let's use inner ids instead of real ones\n",
+ "users = np.vectorize(lambda x: user_id_code.setdefault(x, -1))(users)\n",
+ "items = np.vectorize(lambda x: item_id_code.setdefault(x, -1))(items)\n",
+ "reco = np.concatenate((users, items), axis=1)\n",
+ "reco"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def ranking_metrics(test_ui, reco, super_reactions=[], topK=10):\n",
+ "\n",
+ " nb_items = test_ui.shape[1]\n",
+ " (\n",
+ " relevant_users,\n",
+ " super_relevant_users,\n",
+ " prec,\n",
+ " rec,\n",
+ " F_1,\n",
+ " F_05,\n",
+ " prec_super,\n",
+ " rec_super,\n",
+ " ndcg,\n",
+ " mAP,\n",
+ " MRR,\n",
+ " LAUC,\n",
+ " HR,\n",
+ " ) = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)\n",
+ "\n",
+ " cg = 1.0 / np.log2(np.arange(2, topK + 2))\n",
+ " cg_sum = np.cumsum(cg)\n",
+ "\n",
+ " for (nb_user, user) in tqdm(enumerate(reco[:, 0])):\n",
+ " u_rated_items = test_ui.indices[test_ui.indptr[user] : test_ui.indptr[user + 1]]\n",
+ " nb_u_rated_items = len(u_rated_items)\n",
+ " if (\n",
+ " nb_u_rated_items > 0\n",
+ " ): # skip users with no items in test set (still possible that there will be no super items)\n",
+ " relevant_users += 1\n",
+ "\n",
+ " u_super_items = u_rated_items[\n",
+ " np.vectorize(lambda x: x in super_reactions)(\n",
+ " test_ui.data[test_ui.indptr[user] : test_ui.indptr[user + 1]]\n",
+ " )\n",
+ " ]\n",
+ " # more natural seems u_super_items=[item for item in u_rated_items if test_ui[user,item] in super_reactions]\n",
+ " # but accesing test_ui[user,item] is expensive -we should avoid doing it\n",
+ " if len(u_super_items) > 0:\n",
+ " super_relevant_users += 1\n",
+ "\n",
+ " user_successes = np.zeros(topK)\n",
+ " nb_user_successes = 0\n",
+ " user_super_successes = np.zeros(topK)\n",
+ " nb_user_super_successes = 0\n",
+ "\n",
+ " # evaluation\n",
+ " for (item_position, item) in enumerate(reco[nb_user, 1 : topK + 1]):\n",
+ " if item in u_rated_items:\n",
+ " user_successes[item_position] = 1\n",
+ " nb_user_successes += 1\n",
+ " if item in u_super_items:\n",
+ " user_super_successes[item_position] = 1\n",
+ " nb_user_super_successes += 1\n",
+ "\n",
+ " prec_u = nb_user_successes / topK\n",
+ " prec += prec_u\n",
+ "\n",
+ " rec_u = nb_user_successes / nb_u_rated_items\n",
+ " rec += rec_u\n",
+ "\n",
+ " F_1 += 2 * (prec_u * rec_u) / (prec_u + rec_u) if prec_u + rec_u > 0 else 0\n",
+ " F_05 += (\n",
+ " (0.5 ** 2 + 1) * (prec_u * rec_u) / (0.5 ** 2 * prec_u + rec_u)\n",
+ " if prec_u + rec_u > 0\n",
+ " else 0\n",
+ " )\n",
+ "\n",
+ " prec_super += nb_user_super_successes / topK\n",
+ " rec_super += nb_user_super_successes / max(\n",
+ " len(u_super_items), 1\n",
+ " ) # to set 0 if no super items\n",
+ " ndcg += np.dot(user_successes, cg) / cg_sum[min(topK, nb_u_rated_items) - 1]\n",
+ "\n",
+ " cumsum_successes = np.cumsum(user_successes)\n",
+ " mAP += np.dot(\n",
+ " cumsum_successes / np.arange(1, topK + 1), user_successes\n",
+ " ) / min(topK, nb_u_rated_items)\n",
+ " MRR += (\n",
+ " 1 / (user_successes.nonzero()[0][0] + 1)\n",
+ " if user_successes.nonzero()[0].size > 0\n",
+ " else 0\n",
+ " )\n",
+ " LAUC += (\n",
+ " np.dot(cumsum_successes, 1 - user_successes)\n",
+ " + (nb_user_successes + nb_u_rated_items)\n",
+ " / 2\n",
+ " * ((nb_items - nb_u_rated_items) - (topK - nb_user_successes))\n",
+ " ) / ((nb_items - nb_u_rated_items) * nb_u_rated_items)\n",
+ "\n",
+ " HR += nb_user_successes > 0\n",
+ "\n",
+ " result = []\n",
+ " result.append((\"precision\", prec / relevant_users))\n",
+ " result.append((\"recall\", rec / relevant_users))\n",
+ " result.append((\"F_1\", F_1 / relevant_users))\n",
+ " result.append((\"F_05\", F_05 / relevant_users))\n",
+ " result.append((\"precision_super\", prec_super / super_relevant_users))\n",
+ " result.append((\"recall_super\", rec_super / super_relevant_users))\n",
+ " result.append((\"NDCG\", ndcg / relevant_users))\n",
+ " result.append((\"mAP\", mAP / relevant_users))\n",
+ " result.append((\"MRR\", MRR / relevant_users))\n",
+ " result.append((\"LAUC\", LAUC / relevant_users))\n",
+ " result.append((\"HR\", HR / relevant_users))\n",
+ "\n",
+ " df_result = (pd.DataFrame(list(zip(*result))[1])).T\n",
+ " df_result.columns = list(zip(*result))[0]\n",
+ " return df_result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "943it [00:00, 9434.06it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " precision | \n",
+ " recall | \n",
+ " F_1 | \n",
+ " F_05 | \n",
+ " precision_super | \n",
+ " recall_super | \n",
+ " NDCG | \n",
+ " mAP | \n",
+ " MRR | \n",
+ " LAUC | \n",
+ " HR | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.09141 | \n",
+ " 0.037652 | \n",
+ " 0.04603 | \n",
+ " 0.061286 | \n",
+ " 0.079614 | \n",
+ " 0.056463 | \n",
+ " 0.095957 | \n",
+ " 0.043178 | \n",
+ " 0.198193 | \n",
+ " 0.515501 | \n",
+ " 0.437964 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " precision recall F_1 F_05 precision_super recall_super \\\n",
+ "0 0.09141 0.037652 0.04603 0.061286 0.079614 0.056463 \n",
+ "\n",
+ " NDCG mAP MRR LAUC HR \n",
+ "0 0.095957 0.043178 0.198193 0.515501 0.437964 "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ranking_metrics(test_ui, reco, super_reactions=[4, 5], topK=10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Diversity metrics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def diversity_metrics(test_ui, reco, topK=10):\n",
+ "\n",
+ " frequencies = defaultdict(int)\n",
+ "\n",
+ " # let's assign 0 to all items in test set\n",
+ " for item in list(set(test_ui.indices)):\n",
+ " frequencies[item] = 0\n",
+ "\n",
+ " # counting frequencies\n",
+ " for item in reco[:, 1:].flat:\n",
+ " frequencies[item] += 1\n",
+ "\n",
+ " nb_reco_outside_test = frequencies[-1]\n",
+ " del frequencies[-1]\n",
+ "\n",
+ " frequencies = np.array(list(frequencies.values()))\n",
+ "\n",
+ " nb_rec_items = len(frequencies[frequencies > 0])\n",
+ " nb_reco_inside_test = np.sum(frequencies)\n",
+ "\n",
+ " frequencies = frequencies / np.sum(frequencies)\n",
+ " frequencies = np.sort(frequencies)\n",
+ "\n",
+ " with np.errstate(\n",
+ " divide=\"ignore\"\n",
+ " ): # let's put zeros put items with 0 frequency and ignore division warning\n",
+ " log_frequencies = np.nan_to_num(np.log(frequencies), posinf=0, neginf=0)\n",
+ "\n",
+ " result = []\n",
+ " result.append(\n",
+ " (\n",
+ " \"Reco in test\",\n",
+ " nb_reco_inside_test / (nb_reco_inside_test + nb_reco_outside_test),\n",
+ " )\n",
+ " )\n",
+ " result.append((\"Test coverage\", nb_rec_items / test_ui.shape[1]))\n",
+ " result.append((\"Shannon\", -np.dot(frequencies, log_frequencies)))\n",
+ " result.append(\n",
+ " (\n",
+ " \"Gini\",\n",
+ " np.dot(frequencies, np.arange(1 - len(frequencies), len(frequencies), 2))\n",
+ " / (len(frequencies) - 1),\n",
+ " )\n",
+ " )\n",
+ "\n",
+ " df_result = (pd.DataFrame(list(zip(*result))[1])).T\n",
+ " df_result.columns = list(zip(*result))[0]\n",
+ " return df_result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Reco in test | \n",
+ " Test coverage | \n",
+ " Shannon | \n",
+ " Gini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 0.033911 | \n",
+ " 2.836513 | \n",
+ " 0.991139 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Reco in test Test coverage Shannon Gini\n",
+ "0 1.0 0.033911 2.836513 0.991139"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# in case of errors try !pip3 install numpy==1.18.4 (or pip if you use python 2) and restart the kernel\n",
+ "\n",
+ "x = diversity_metrics(test_ui, reco, topK=10)\n",
+ "x"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# To be used in other notebooks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "943it [00:00, 11012.47it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " RMSE | \n",
+ " MAE | \n",
+ " precision | \n",
+ " recall | \n",
+ " F_1 | \n",
+ " F_05 | \n",
+ " precision_super | \n",
+ " recall_super | \n",
+ " NDCG | \n",
+ " mAP | \n",
+ " MRR | \n",
+ " LAUC | \n",
+ " HR | \n",
+ " Reco in test | \n",
+ " Test coverage | \n",
+ " Shannon | \n",
+ " Gini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.949459 | \n",
+ " 0.752487 | \n",
+ " 0.09141 | \n",
+ " 0.037652 | \n",
+ " 0.04603 | \n",
+ " 0.061286 | \n",
+ " 0.079614 | \n",
+ " 0.056463 | \n",
+ " 0.095957 | \n",
+ " 0.043178 | \n",
+ " 0.198193 | \n",
+ " 0.515501 | \n",
+ " 0.437964 | \n",
+ " 1.0 | \n",
+ " 0.033911 | \n",
+ " 2.836513 | \n",
+ " 0.991139 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " RMSE MAE precision recall F_1 F_05 \\\n",
+ "0 0.949459 0.752487 0.09141 0.037652 0.04603 0.061286 \n",
+ "\n",
+ " precision_super recall_super NDCG mAP MRR LAUC \\\n",
+ "0 0.079614 0.056463 0.095957 0.043178 0.198193 0.515501 \n",
+ "\n",
+ " HR Reco in test Test coverage Shannon Gini \n",
+ "0 0.437964 1.0 0.033911 2.836513 0.991139 "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import evaluation_measures as ev\n",
+ "\n",
+ "estimations_df = pd.read_csv(\n",
+ " \"Recommendations generated/ml-100k/Ready_Baseline_estimations.csv\", header=None\n",
+ ")\n",
+ "reco = np.loadtxt(\n",
+ " \"Recommendations generated/ml-100k/Ready_Baseline_reco.csv\", delimiter=\",\"\n",
+ ")\n",
+ "\n",
+ "ev.evaluate(\n",
+ " test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n",
+ " estimations_df=estimations_df,\n",
+ " reco=reco,\n",
+ " super_reactions=[4, 5],\n",
+ ")\n",
+ "# also you can just type ev.evaluate_all(estimations_df, reco) - I put above values as default"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "943it [00:00, 10346.82it/s]\n",
+ "943it [00:00, 11772.32it/s]\n",
+ "943it [00:00, 10636.62it/s]\n",
+ "943it [00:00, 10767.92it/s]\n",
+ "943it [00:00, 12019.93it/s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "dir_path = \"Recommendations generated/ml-100k/\"\n",
+ "super_reactions = [4, 5]\n",
+ "test = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n",
+ "\n",
+ "df = ev.evaluate_all(test, dir_path, super_reactions)\n",
+ "# also you can just type ev.evaluate_all() - I put above values as default"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Model | \n",
+ " RMSE | \n",
+ " MAE | \n",
+ " precision | \n",
+ " recall | \n",
+ " F_1 | \n",
+ " F_05 | \n",
+ " precision_super | \n",
+ " recall_super | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Self_TopPop | \n",
+ " 2.508258 | \n",
+ " 2.217909 | \n",
+ " 0.188865 | \n",
+ " 0.116919 | \n",
+ " 0.118732 | \n",
+ " 0.141584 | \n",
+ " 0.130472 | \n",
+ " 0.137473 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Baseline | \n",
+ " 0.949459 | \n",
+ " 0.752487 | \n",
+ " 0.091410 | \n",
+ " 0.037652 | \n",
+ " 0.046030 | \n",
+ " 0.061286 | \n",
+ " 0.079614 | \n",
+ " 0.056463 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Random | \n",
+ " 1.521845 | \n",
+ " 1.225949 | \n",
+ " 0.047190 | \n",
+ " 0.020753 | \n",
+ " 0.024810 | \n",
+ " 0.032269 | \n",
+ " 0.029506 | \n",
+ " 0.023707 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_TopRated | \n",
+ " 1.030712 | \n",
+ " 0.820904 | \n",
+ " 0.000954 | \n",
+ " 0.000188 | \n",
+ " 0.000298 | \n",
+ " 0.000481 | \n",
+ " 0.000644 | \n",
+ " 0.000223 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_BaselineUI | \n",
+ " 0.967585 | \n",
+ " 0.762740 | \n",
+ " 0.000954 | \n",
+ " 0.000170 | \n",
+ " 0.000278 | \n",
+ " 0.000463 | \n",
+ " 0.000644 | \n",
+ " 0.000189 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Model RMSE MAE precision recall F_1 \\\n",
+ "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n",
+ "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n",
+ "0 Ready_Random 1.521845 1.225949 0.047190 0.020753 0.024810 \n",
+ "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n",
+ "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n",
+ "\n",
+ " F_05 precision_super recall_super \n",
+ "0 0.141584 0.130472 0.137473 \n",
+ "0 0.061286 0.079614 0.056463 \n",
+ "0 0.032269 0.029506 0.023707 \n",
+ "0 0.000481 0.000644 0.000223 \n",
+ "0 0.000463 0.000644 0.000189 "
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.iloc[:, :9]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Model | \n",
+ " NDCG | \n",
+ " mAP | \n",
+ " MRR | \n",
+ " LAUC | \n",
+ " HR | \n",
+ " Reco in test | \n",
+ " Test coverage | \n",
+ " Shannon | \n",
+ " Gini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Self_TopPop | \n",
+ " 0.214651 | \n",
+ " 0.111707 | \n",
+ " 0.400939 | \n",
+ " 0.555546 | \n",
+ " 0.765642 | \n",
+ " 1.000000 | \n",
+ " 0.038961 | \n",
+ " 3.159079 | \n",
+ " 0.987317 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Baseline | \n",
+ " 0.095957 | \n",
+ " 0.043178 | \n",
+ " 0.198193 | \n",
+ " 0.515501 | \n",
+ " 0.437964 | \n",
+ " 1.000000 | \n",
+ " 0.033911 | \n",
+ " 2.836513 | \n",
+ " 0.991139 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Random | \n",
+ " 0.050075 | \n",
+ " 0.018728 | \n",
+ " 0.121957 | \n",
+ " 0.506893 | \n",
+ " 0.329799 | \n",
+ " 0.986532 | \n",
+ " 0.184704 | \n",
+ " 5.099706 | \n",
+ " 0.907217 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_TopRated | \n",
+ " 0.001043 | \n",
+ " 0.000335 | \n",
+ " 0.003348 | \n",
+ " 0.496433 | \n",
+ " 0.009544 | \n",
+ " 0.699046 | \n",
+ " 0.005051 | \n",
+ " 1.945910 | \n",
+ " 0.995669 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_BaselineUI | \n",
+ " 0.000752 | \n",
+ " 0.000168 | \n",
+ " 0.001677 | \n",
+ " 0.496424 | \n",
+ " 0.009544 | \n",
+ " 0.600530 | \n",
+ " 0.005051 | \n",
+ " 1.803126 | \n",
+ " 0.996380 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Model NDCG mAP MRR LAUC HR \\\n",
+ "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n",
+ "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n",
+ "0 Ready_Random 0.050075 0.018728 0.121957 0.506893 0.329799 \n",
+ "0 Self_TopRated 0.001043 0.000335 0.003348 0.496433 0.009544 \n",
+ "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n",
+ "\n",
+ " Reco in test Test coverage Shannon Gini \n",
+ "0 1.000000 0.038961 3.159079 0.987317 \n",
+ "0 1.000000 0.033911 2.836513 0.991139 \n",
+ "0 0.986532 0.184704 5.099706 0.907217 \n",
+ "0 0.699046 0.005051 1.945910 0.995669 \n",
+ "0 0.600530 0.005051 1.803126 0.996380 "
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.iloc[:, np.append(0, np.arange(9, df.shape[1]))]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Check metrics on toy dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "3it [00:00, 5771.98it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Model | \n",
+ " RMSE | \n",
+ " MAE | \n",
+ " precision | \n",
+ " recall | \n",
+ " F_1 | \n",
+ " F_05 | \n",
+ " precision_super | \n",
+ " recall_super | \n",
+ " NDCG | \n",
+ " mAP | \n",
+ " MRR | \n",
+ " LAUC | \n",
+ " HR | \n",
+ " Reco in test | \n",
+ " Test coverage | \n",
+ " Shannon | \n",
+ " Gini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Self_BaselineUI | \n",
+ " 1.612452 | \n",
+ " 1.4 | \n",
+ " 0.444444 | \n",
+ " 0.888889 | \n",
+ " 0.555556 | \n",
+ " 0.478632 | \n",
+ " 0.333333 | \n",
+ " 0.75 | \n",
+ " 0.676907 | \n",
+ " 0.574074 | \n",
+ " 0.611111 | \n",
+ " 0.638889 | \n",
+ " 1.0 | \n",
+ " 0.888889 | \n",
+ " 0.8 | \n",
+ " 1.386294 | \n",
+ " 0.25 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Model RMSE MAE precision recall F_1 F_05 \\\n",
+ "0 Self_BaselineUI 1.612452 1.4 0.444444 0.888889 0.555556 0.478632 \n",
+ "\n",
+ " precision_super recall_super NDCG mAP MRR LAUC HR \\\n",
+ "0 0.333333 0.75 0.676907 0.574074 0.611111 0.638889 1.0 \n",
+ "\n",
+ " Reco in test Test coverage Shannon Gini \n",
+ "0 0.888889 0.8 1.386294 0.25 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Training data:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n",
+ " [0, 1, 2, 3, 0, 0, 0, 0],\n",
+ " [0, 0, 0, 5, 0, 3, 4, 0]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test data:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[0, 0, 0, 0, 0, 0, 3, 0],\n",
+ " [0, 0, 0, 0, 5, 0, 0, 0],\n",
+ " [5, 0, 4, 0, 0, 0, 0, 2]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Recommendations:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 30 | \n",
+ " 5.0 | \n",
+ " 20 | \n",
+ " 4.0 | \n",
+ " 60 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 10 | \n",
+ " 40 | \n",
+ " 3.0 | \n",
+ " 60 | \n",
+ " 2.0 | \n",
+ " 70 | \n",
+ " 2.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20 | \n",
+ " 40 | \n",
+ " 5.0 | \n",
+ " 20 | \n",
+ " 4.0 | \n",
+ " 70 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 4 5 6\n",
+ "0 0 30 5.0 20 4.0 60 4.0\n",
+ "1 10 40 3.0 60 2.0 70 2.0\n",
+ "2 20 40 5.0 20 4.0 70 4.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimations:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " item | \n",
+ " est_score | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 60 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 10 | \n",
+ " 40 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20 | \n",
+ " 0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 20 | \n",
+ " 20 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 20 | \n",
+ " 70 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user item est_score\n",
+ "0 0 60 4.0\n",
+ "1 10 40 3.0\n",
+ "2 20 0 3.0\n",
+ "3 20 20 4.0\n",
+ "4 20 70 4.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import helpers\n",
+ "\n",
+ "dir_path = \"Recommendations generated/toy-example/\"\n",
+ "super_reactions = [4, 5]\n",
+ "test = pd.read_csv(\"./Datasets/toy-example/test.csv\", sep=\"\\t\", header=None)\n",
+ "\n",
+ "display(ev.evaluate_all(test, dir_path, super_reactions, topK=3))\n",
+ "# also you can just type ev.evaluate_all() - I put above values as default\n",
+ "\n",
+ "toy_train_read = pd.read_csv(\n",
+ " \"./Datasets/toy-example/train.csv\",\n",
+ " sep=\"\\t\",\n",
+ " header=None,\n",
+ " names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n",
+ ")\n",
+ "toy_test_read = pd.read_csv(\n",
+ " \"./Datasets/toy-example/test.csv\",\n",
+ " sep=\"\\t\",\n",
+ " header=None,\n",
+ " names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n",
+ ")\n",
+ "reco = pd.read_csv(\n",
+ " \"Recommendations generated/toy-example/Self_BaselineUI_reco.csv\", header=None\n",
+ ")\n",
+ "estimations = pd.read_csv(\n",
+ " \"Recommendations generated/toy-example/Self_BaselineUI_estimations.csv\",\n",
+ " names=[\"user\", \"item\", \"est_score\"],\n",
+ ")\n",
+ "(\n",
+ " toy_train_ui,\n",
+ " toy_test_ui,\n",
+ " toy_user_code_id,\n",
+ " toy_user_id_code,\n",
+ " toy_item_code_id,\n",
+ " toy_item_id_code,\n",
+ ") = helpers.data_to_csr(toy_train_read, toy_test_read)\n",
+ "\n",
+ "print(\"Training data:\")\n",
+ "display(toy_train_ui.todense())\n",
+ "\n",
+ "print(\"Test data:\")\n",
+ "display(toy_test_ui.todense())\n",
+ "\n",
+ "print(\"Recommendations:\")\n",
+ "display(reco)\n",
+ "\n",
+ "print(\"Estimations:\")\n",
+ "display(estimations)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Sample recommendations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Here is what user rated high:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " rating | \n",
+ " title | \n",
+ " genres | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 57482 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " Emma (1996) | \n",
+ " Drama, Romance | \n",
+ "
\n",
+ " \n",
+ " 54506 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " Sense and Sensibility (1995) | \n",
+ " Drama, Romance | \n",
+ "
\n",
+ " \n",
+ " 40581 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " Titanic (1997) | \n",
+ " Action, Drama, Romance | \n",
+ "
\n",
+ " \n",
+ " 2949 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " Star Wars (1977) | \n",
+ " Action, Adventure, Romance, Sci-Fi, War | \n",
+ "
\n",
+ " \n",
+ " 69653 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " Wings of the Dove, The (1997) | \n",
+ " Drama, Romance, Thriller | \n",
+ "
\n",
+ " \n",
+ " 7906 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " As Good As It Gets (1997) | \n",
+ " Comedy, Drama | \n",
+ "
\n",
+ " \n",
+ " 69400 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " Shall We Dance? (1996) | \n",
+ " Comedy | \n",
+ "
\n",
+ " \n",
+ " 14469 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " Fargo (1996) | \n",
+ " Crime, Drama, Thriller | \n",
+ "
\n",
+ " \n",
+ " 46151 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " L.A. Confidential (1997) | \n",
+ " Crime, Film-Noir, Mystery, Thriller | \n",
+ "
\n",
+ " \n",
+ " 67293 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " Good Will Hunting (1997) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 20923 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " Secrets & Lies (1996) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 52921 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " Kolya (1996) | \n",
+ " Comedy | \n",
+ "
\n",
+ " \n",
+ " 50103 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " Mrs. Brown (Her Majesty, Mrs. Brown) (1997) | \n",
+ " Drama, Romance | \n",
+ "
\n",
+ " \n",
+ " 51972 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " Mighty Aphrodite (1995) | \n",
+ " Comedy | \n",
+ "
\n",
+ " \n",
+ " 515 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " Heat (1995) | \n",
+ " Action, Crime, Thriller | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user rating title \\\n",
+ "57482 2 5 Emma (1996) \n",
+ "54506 2 5 Sense and Sensibility (1995) \n",
+ "40581 2 5 Titanic (1997) \n",
+ "2949 2 5 Star Wars (1977) \n",
+ "69653 2 5 Wings of the Dove, The (1997) \n",
+ "7906 2 5 As Good As It Gets (1997) \n",
+ "69400 2 5 Shall We Dance? (1996) \n",
+ "14469 2 5 Fargo (1996) \n",
+ "46151 2 5 L.A. Confidential (1997) \n",
+ "67293 2 5 Good Will Hunting (1997) \n",
+ "20923 2 5 Secrets & Lies (1996) \n",
+ "52921 2 5 Kolya (1996) \n",
+ "50103 2 4 Mrs. Brown (Her Majesty, Mrs. Brown) (1997) \n",
+ "51972 2 4 Mighty Aphrodite (1995) \n",
+ "515 2 4 Heat (1995) \n",
+ "\n",
+ " genres \n",
+ "57482 Drama, Romance \n",
+ "54506 Drama, Romance \n",
+ "40581 Action, Drama, Romance \n",
+ "2949 Action, Adventure, Romance, Sci-Fi, War \n",
+ "69653 Drama, Romance, Thriller \n",
+ "7906 Comedy, Drama \n",
+ "69400 Comedy \n",
+ "14469 Crime, Drama, Thriller \n",
+ "46151 Crime, Film-Noir, Mystery, Thriller \n",
+ "67293 Drama \n",
+ "20923 Drama \n",
+ "52921 Comedy \n",
+ "50103 Drama, Romance \n",
+ "51972 Comedy \n",
+ "515 Action, Crime, Thriller "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Here is what we recommend:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " rec_nb | \n",
+ " title | \n",
+ " genres | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 2.0 | \n",
+ " 1 | \n",
+ " Great Day in Harlem, A (1994) | \n",
+ " Documentary | \n",
+ "
\n",
+ " \n",
+ " 943 | \n",
+ " 2.0 | \n",
+ " 2 | \n",
+ " Tough and Deadly (1995) | \n",
+ " Action, Drama, Thriller | \n",
+ "
\n",
+ " \n",
+ " 1885 | \n",
+ " 2.0 | \n",
+ " 3 | \n",
+ " Aiqing wansui (1994) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 2827 | \n",
+ " 2.0 | \n",
+ " 4 | \n",
+ " Delta of Venus (1994) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 3769 | \n",
+ " 2.0 | \n",
+ " 5 | \n",
+ " Someone Else's America (1995) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 4711 | \n",
+ " 2.0 | \n",
+ " 6 | \n",
+ " Saint of Fort Washington, The (1993) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 5653 | \n",
+ " 2.0 | \n",
+ " 7 | \n",
+ " Celestial Clockwork (1994) | \n",
+ " Comedy | \n",
+ "
\n",
+ " \n",
+ " 6595 | \n",
+ " 2.0 | \n",
+ " 8 | \n",
+ " Some Mother's Son (1996) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 8489 | \n",
+ " 2.0 | \n",
+ " 9 | \n",
+ " Maya Lin: A Strong Clear Vision (1994) | \n",
+ " Documentary | \n",
+ "
\n",
+ " \n",
+ " 7536 | \n",
+ " 2.0 | \n",
+ " 10 | \n",
+ " Prefontaine (1997) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user rec_nb title \\\n",
+ "1 2.0 1 Great Day in Harlem, A (1994) \n",
+ "943 2.0 2 Tough and Deadly (1995) \n",
+ "1885 2.0 3 Aiqing wansui (1994) \n",
+ "2827 2.0 4 Delta of Venus (1994) \n",
+ "3769 2.0 5 Someone Else's America (1995) \n",
+ "4711 2.0 6 Saint of Fort Washington, The (1993) \n",
+ "5653 2.0 7 Celestial Clockwork (1994) \n",
+ "6595 2.0 8 Some Mother's Son (1996) \n",
+ "8489 2.0 9 Maya Lin: A Strong Clear Vision (1994) \n",
+ "7536 2.0 10 Prefontaine (1997) \n",
+ "\n",
+ " genres \n",
+ "1 Documentary \n",
+ "943 Action, Drama, Thriller \n",
+ "1885 Drama \n",
+ "2827 Drama \n",
+ "3769 Drama \n",
+ "4711 Drama \n",
+ "5653 Comedy \n",
+ "6595 Drama \n",
+ "8489 Documentary \n",
+ "7536 Drama "
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train = pd.read_csv(\n",
+ " \"./Datasets/ml-100k/train.csv\",\n",
+ " sep=\"\\t\",\n",
+ " header=None,\n",
+ " names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n",
+ ")\n",
+ "items = pd.read_csv(\"./Datasets/ml-100k/movies.csv\")\n",
+ "\n",
+ "user = random.choice(list(set(train[\"user\"])))\n",
+ "\n",
+ "train_content = pd.merge(train, items, left_on=\"item\", right_on=\"id\")\n",
+ "\n",
+ "print(\"Here is what user rated high:\")\n",
+ "display(\n",
+ " train_content[train_content[\"user\"] == user][\n",
+ " [\"user\", \"rating\", \"title\", \"genres\"]\n",
+ " ].sort_values(by=\"rating\", ascending=False)[:15]\n",
+ ")\n",
+ "\n",
+ "reco = np.loadtxt(\n",
+ " \"Recommendations generated/ml-100k/Self_BaselineUI_reco.csv\", delimiter=\",\"\n",
+ ")\n",
+ "items = pd.read_csv(\"./Datasets/ml-100k/movies.csv\")\n",
+ "\n",
+ "# Let's ignore scores - they are not used in evaluation:\n",
+ "reco_users = reco[:, :1]\n",
+ "reco_items = reco[:, 1::2]\n",
+ "# Let's put them into one array\n",
+ "reco = np.concatenate((reco_users, reco_items), axis=1)\n",
+ "\n",
+ "# Let's rebuild it user-item dataframe\n",
+ "recommended = []\n",
+ "for row in reco:\n",
+ " for rec_nb, entry in enumerate(row[1:]):\n",
+ " recommended.append((row[0], rec_nb + 1, entry))\n",
+ "recommended = pd.DataFrame(recommended, columns=[\"user\", \"rec_nb\", \"item\"])\n",
+ "\n",
+ "recommended_content = pd.merge(recommended, items, left_on=\"item\", right_on=\"id\")\n",
+ "\n",
+ "print(\"Here is what we recommend:\")\n",
+ "recommended_content[recommended_content[\"user\"] == user][\n",
+ " [\"user\", \"rec_nb\", \"title\", \"genres\"]\n",
+ "].sort_values(by=\"rec_nb\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# project task 2: implement some other evaluation measure"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# it may be your idea, modification of what we have already implemented\n",
+ "# (for example Hit2 rate which would count as a success users whoreceived at least 2 relevant recommendations)\n",
+ "# or something well-known\n",
+ "# expected output: modification of evaluation_measures.py such that evaluate_all will also display your measure"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/.ipynb_checkpoints/P3. k-nearest neighbours-checkpoint.ipynb b/.ipynb_checkpoints/P3. k-nearest neighbours-checkpoint.ipynb
new file mode 100644
index 0000000..a15592c
--- /dev/null
+++ b/.ipynb_checkpoints/P3. k-nearest neighbours-checkpoint.ipynb
@@ -0,0 +1,1057 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Self made simplified I-KNN"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import helpers\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import scipy.sparse as sparse\n",
+ "from collections import defaultdict\n",
+ "from itertools import chain\n",
+ "import random\n",
+ "\n",
+ "train_read = pd.read_csv(\"./Datasets/ml-100k/train.csv\", sep=\"\\t\", header=None)\n",
+ "test_read = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n",
+ "(\n",
+ " train_ui,\n",
+ " test_ui,\n",
+ " user_code_id,\n",
+ " user_id_code,\n",
+ " item_code_id,\n",
+ " item_id_code,\n",
+ ") = helpers.data_to_csr(train_read, test_read)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class IKNN:\n",
+ " def fit(self, train_ui):\n",
+ " self.train_ui = train_ui\n",
+ "\n",
+ " train_iu = train_ui.transpose()\n",
+ " norms = np.linalg.norm(\n",
+ " train_iu.A, axis=1\n",
+ " ) # here we compute length of each item ratings vector\n",
+ " norms = np.vectorize(lambda x: max(x, 1))(\n",
+ " norms[:, None]\n",
+ " ) # to avoid dividing by zero\n",
+ "\n",
+ " normalized_train_iu = sparse.csr_matrix(train_iu / norms)\n",
+ "\n",
+ " self.similarity_matrix_ii = (\n",
+ " normalized_train_iu * normalized_train_iu.transpose()\n",
+ " )\n",
+ "\n",
+ " self.estimations = np.array(\n",
+ " train_ui\n",
+ " * self.similarity_matrix_ii\n",
+ " / ((train_ui > 0) * self.similarity_matrix_ii)\n",
+ " )\n",
+ "\n",
+ " def recommend(self, user_code_id, item_code_id, topK=10):\n",
+ "\n",
+ " top_k = defaultdict(list)\n",
+ " for nb_user, user in enumerate(self.estimations):\n",
+ "\n",
+ " user_rated = self.train_ui.indices[\n",
+ " self.train_ui.indptr[nb_user] : self.train_ui.indptr[nb_user + 1]\n",
+ " ]\n",
+ " for item, score in enumerate(user):\n",
+ " if item not in user_rated and not np.isnan(score):\n",
+ " top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n",
+ " result = []\n",
+ " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n",
+ " for uid, item_scores in top_k.items():\n",
+ " item_scores.sort(key=lambda x: x[1], reverse=True)\n",
+ " result.append([uid] + list(chain(*item_scores[:topK])))\n",
+ " return result\n",
+ "\n",
+ " def estimate(self, user_code_id, item_code_id, test_ui):\n",
+ " result = []\n",
+ " for user, item in zip(*test_ui.nonzero()):\n",
+ " result.append(\n",
+ " [\n",
+ " user_code_id[user],\n",
+ " item_code_id[item],\n",
+ " self.estimations[user, item]\n",
+ " if not np.isnan(self.estimations[user, item])\n",
+ " else 1,\n",
+ " ]\n",
+ " )\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "toy train ui:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([[3, 4, 0, 0, 5, 0, 0, 4],\n",
+ " [0, 1, 2, 3, 0, 0, 0, 0],\n",
+ " [0, 0, 0, 5, 0, 3, 4, 0]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "similarity matrix:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([[1. , 0.9701425 , 0. , 0. , 1. ,\n",
+ " 0. , 0. , 1. ],\n",
+ " [0.9701425 , 1. , 0.24253563, 0.12478355, 0.9701425 ,\n",
+ " 0. , 0. , 0.9701425 ],\n",
+ " [0. , 0.24253563, 1. , 0.51449576, 0. ,\n",
+ " 0. , 0. , 0. ],\n",
+ " [0. , 0.12478355, 0.51449576, 1. , 0. ,\n",
+ " 0.85749293, 0.85749293, 0. ],\n",
+ " [1. , 0.9701425 , 0. , 0. , 1. ,\n",
+ " 0. , 0. , 1. ],\n",
+ " [0. , 0. , 0. , 0.85749293, 0. ,\n",
+ " 1. , 1. , 0. ],\n",
+ " [0. , 0. , 0. , 0.85749293, 0. ,\n",
+ " 1. , 1. , 0. ],\n",
+ " [1. , 0.9701425 , 0. , 0. , 1. ,\n",
+ " 0. , 0. , 1. ]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "estimations matrix:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([[4. , 4. , 4. , 4. , 4. ,\n",
+ " nan, nan, 4. ],\n",
+ " [1. , 1.35990333, 2.15478388, 2.53390319, 1. ,\n",
+ " 3. , 3. , 1. ],\n",
+ " [ nan, 5. , 5. , 4.05248907, nan,\n",
+ " 3.95012863, 3.95012863, nan]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[[0, 20, 4.0, 30, 4.0],\n",
+ " [10, 50, 3.0, 60, 3.0, 0, 1.0, 40, 1.0, 70, 1.0],\n",
+ " [20, 10, 5.0, 20, 5.0]]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# toy example\n",
+ "toy_train_read = pd.read_csv(\n",
+ " \"./Datasets/toy-example/train.csv\",\n",
+ " sep=\"\\t\",\n",
+ " header=None,\n",
+ " names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n",
+ ")\n",
+ "toy_test_read = pd.read_csv(\n",
+ " \"./Datasets/toy-example/test.csv\",\n",
+ " sep=\"\\t\",\n",
+ " header=None,\n",
+ " names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n",
+ ")\n",
+ "\n",
+ "(\n",
+ " toy_train_ui,\n",
+ " toy_test_ui,\n",
+ " toy_user_code_id,\n",
+ " toy_user_id_code,\n",
+ " toy_item_code_id,\n",
+ " toy_item_id_code,\n",
+ ") = helpers.data_to_csr(toy_train_read, toy_test_read)\n",
+ "\n",
+ "\n",
+ "model = IKNN()\n",
+ "model.fit(toy_train_ui)\n",
+ "\n",
+ "print(\"toy train ui:\")\n",
+ "display(toy_train_ui.A)\n",
+ "\n",
+ "print(\"similarity matrix:\")\n",
+ "display(model.similarity_matrix_ii.A)\n",
+ "\n",
+ "print(\"estimations matrix:\")\n",
+ "display(model.estimations)\n",
+ "\n",
+ "model.recommend(toy_user_code_id, toy_item_code_id)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model = IKNN()\n",
+ "model.fit(train_ui)\n",
+ "\n",
+ "top_n = pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
+ "\n",
+ "top_n.to_csv(\n",
+ " \"Recommendations generated/ml-100k/Self_IKNN_reco.csv\", index=False, header=False\n",
+ ")\n",
+ "\n",
+ "estimations = pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
+ "estimations.to_csv(\n",
+ " \"Recommendations generated/ml-100k/Self_IKNN_estimations.csv\",\n",
+ " index=False,\n",
+ " header=False,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "943it [00:00, 9004.71it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " RMSE | \n",
+ " MAE | \n",
+ " precision | \n",
+ " recall | \n",
+ " F_1 | \n",
+ " F_05 | \n",
+ " precision_super | \n",
+ " recall_super | \n",
+ " NDCG | \n",
+ " mAP | \n",
+ " MRR | \n",
+ " LAUC | \n",
+ " HR | \n",
+ " Reco in test | \n",
+ " Test coverage | \n",
+ " Shannon | \n",
+ " Gini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1.018363 | \n",
+ " 0.808793 | \n",
+ " 0.000318 | \n",
+ " 0.000108 | \n",
+ " 0.00014 | \n",
+ " 0.000189 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.000214 | \n",
+ " 0.000037 | \n",
+ " 0.000368 | \n",
+ " 0.496391 | \n",
+ " 0.003181 | \n",
+ " 0.392153 | \n",
+ " 0.11544 | \n",
+ " 4.174741 | \n",
+ " 0.965327 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " RMSE MAE precision recall F_1 F_05 \\\n",
+ "0 1.018363 0.808793 0.000318 0.000108 0.00014 0.000189 \n",
+ "\n",
+ " precision_super recall_super NDCG mAP MRR LAUC \\\n",
+ "0 0.0 0.0 0.000214 0.000037 0.000368 0.496391 \n",
+ "\n",
+ " HR Reco in test Test coverage Shannon Gini \n",
+ "0 0.003181 0.392153 0.11544 4.174741 0.965327 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import evaluation_measures as ev\n",
+ "\n",
+ "estimations_df = pd.read_csv(\n",
+ " \"Recommendations generated/ml-100k/Self_IKNN_estimations.csv\", header=None\n",
+ ")\n",
+ "reco = np.loadtxt(\"Recommendations generated/ml-100k/Self_IKNN_reco.csv\", delimiter=\",\")\n",
+ "\n",
+ "ev.evaluate(\n",
+ " test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n",
+ " estimations_df=estimations_df,\n",
+ " reco=reco,\n",
+ " super_reactions=[4, 5],\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "943it [00:00, 8517.83it/s]\n",
+ "943it [00:00, 11438.64it/s]\n",
+ "943it [00:00, 11933.36it/s]\n",
+ "943it [00:00, 10307.81it/s]\n",
+ "943it [00:00, 12250.41it/s]\n",
+ "943it [00:00, 12064.07it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Model | \n",
+ " RMSE | \n",
+ " MAE | \n",
+ " precision | \n",
+ " recall | \n",
+ " F_1 | \n",
+ " F_05 | \n",
+ " precision_super | \n",
+ " recall_super | \n",
+ " NDCG | \n",
+ " mAP | \n",
+ " MRR | \n",
+ " LAUC | \n",
+ " HR | \n",
+ " Reco in test | \n",
+ " Test coverage | \n",
+ " Shannon | \n",
+ " Gini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Self_TopPop | \n",
+ " 2.508258 | \n",
+ " 2.217909 | \n",
+ " 0.188865 | \n",
+ " 0.116919 | \n",
+ " 0.118732 | \n",
+ " 0.141584 | \n",
+ " 0.130472 | \n",
+ " 0.137473 | \n",
+ " 0.214651 | \n",
+ " 0.111707 | \n",
+ " 0.400939 | \n",
+ " 0.555546 | \n",
+ " 0.765642 | \n",
+ " 1.000000 | \n",
+ " 0.038961 | \n",
+ " 3.159079 | \n",
+ " 0.987317 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Baseline | \n",
+ " 0.949459 | \n",
+ " 0.752487 | \n",
+ " 0.091410 | \n",
+ " 0.037652 | \n",
+ " 0.046030 | \n",
+ " 0.061286 | \n",
+ " 0.079614 | \n",
+ " 0.056463 | \n",
+ " 0.095957 | \n",
+ " 0.043178 | \n",
+ " 0.198193 | \n",
+ " 0.515501 | \n",
+ " 0.437964 | \n",
+ " 1.000000 | \n",
+ " 0.033911 | \n",
+ " 2.836513 | \n",
+ " 0.991139 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Random | \n",
+ " 1.521845 | \n",
+ " 1.225949 | \n",
+ " 0.047190 | \n",
+ " 0.020753 | \n",
+ " 0.024810 | \n",
+ " 0.032269 | \n",
+ " 0.029506 | \n",
+ " 0.023707 | \n",
+ " 0.050075 | \n",
+ " 0.018728 | \n",
+ " 0.121957 | \n",
+ " 0.506893 | \n",
+ " 0.329799 | \n",
+ " 0.986532 | \n",
+ " 0.184704 | \n",
+ " 5.099706 | \n",
+ " 0.907217 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_TopRated | \n",
+ " 1.030712 | \n",
+ " 0.820904 | \n",
+ " 0.000954 | \n",
+ " 0.000188 | \n",
+ " 0.000298 | \n",
+ " 0.000481 | \n",
+ " 0.000644 | \n",
+ " 0.000223 | \n",
+ " 0.001043 | \n",
+ " 0.000335 | \n",
+ " 0.003348 | \n",
+ " 0.496433 | \n",
+ " 0.009544 | \n",
+ " 0.699046 | \n",
+ " 0.005051 | \n",
+ " 1.945910 | \n",
+ " 0.995669 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_BaselineUI | \n",
+ " 0.967585 | \n",
+ " 0.762740 | \n",
+ " 0.000954 | \n",
+ " 0.000170 | \n",
+ " 0.000278 | \n",
+ " 0.000463 | \n",
+ " 0.000644 | \n",
+ " 0.000189 | \n",
+ " 0.000752 | \n",
+ " 0.000168 | \n",
+ " 0.001677 | \n",
+ " 0.496424 | \n",
+ " 0.009544 | \n",
+ " 0.600530 | \n",
+ " 0.005051 | \n",
+ " 1.803126 | \n",
+ " 0.996380 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_IKNN | \n",
+ " 1.018363 | \n",
+ " 0.808793 | \n",
+ " 0.000318 | \n",
+ " 0.000108 | \n",
+ " 0.000140 | \n",
+ " 0.000189 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000214 | \n",
+ " 0.000037 | \n",
+ " 0.000368 | \n",
+ " 0.496391 | \n",
+ " 0.003181 | \n",
+ " 0.392153 | \n",
+ " 0.115440 | \n",
+ " 4.174741 | \n",
+ " 0.965327 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Model RMSE MAE precision recall F_1 \\\n",
+ "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n",
+ "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n",
+ "0 Ready_Random 1.521845 1.225949 0.047190 0.020753 0.024810 \n",
+ "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n",
+ "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n",
+ "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n",
+ "\n",
+ " F_05 precision_super recall_super NDCG mAP MRR \\\n",
+ "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n",
+ "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n",
+ "0 0.032269 0.029506 0.023707 0.050075 0.018728 0.121957 \n",
+ "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n",
+ "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n",
+ "0 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 \n",
+ "\n",
+ " LAUC HR Reco in test Test coverage Shannon Gini \n",
+ "0 0.555546 0.765642 1.000000 0.038961 3.159079 0.987317 \n",
+ "0 0.515501 0.437964 1.000000 0.033911 2.836513 0.991139 \n",
+ "0 0.506893 0.329799 0.986532 0.184704 5.099706 0.907217 \n",
+ "0 0.496433 0.009544 0.699046 0.005051 1.945910 0.995669 \n",
+ "0 0.496424 0.009544 0.600530 0.005051 1.803126 0.996380 \n",
+ "0 0.496391 0.003181 0.392153 0.115440 4.174741 0.965327 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dir_path = \"Recommendations generated/ml-100k/\"\n",
+ "super_reactions = [4, 5]\n",
+ "test = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n",
+ "\n",
+ "ev.evaluate_all(test, dir_path, super_reactions)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Ready-made KNNs - Surprise implementation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### I-KNN - basic"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Computing the cosine similarity matrix...\n",
+ "Done computing similarity matrix.\n",
+ "Generating predictions...\n",
+ "Generating top N recommendations...\n",
+ "Generating predictions...\n"
+ ]
+ }
+ ],
+ "source": [
+ "import helpers\n",
+ "import surprise as sp\n",
+ "\n",
+ "sim_options = {\n",
+ " \"name\": \"cosine\",\n",
+ " \"user_based\": False,\n",
+ "} # compute similarities between items\n",
+ "algo = sp.KNNBasic(sim_options=sim_options)\n",
+ "\n",
+ "helpers.ready_made(\n",
+ " algo,\n",
+ " reco_path=\"Recommendations generated/ml-100k/Ready_I-KNN_reco.csv\",\n",
+ " estimations_path=\"Recommendations generated/ml-100k/Ready_I-KNN_estimations.csv\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### U-KNN - basic"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Computing the cosine similarity matrix...\n",
+ "Done computing similarity matrix.\n",
+ "Generating predictions...\n",
+ "Generating top N recommendations...\n",
+ "Generating predictions...\n"
+ ]
+ }
+ ],
+ "source": [
+ "sim_options = {\n",
+ " \"name\": \"cosine\",\n",
+ " \"user_based\": True,\n",
+ "} # compute similarities between users\n",
+ "algo = sp.KNNBasic(sim_options=sim_options)\n",
+ "\n",
+ "helpers.ready_made(\n",
+ " algo,\n",
+ " reco_path=\"Recommendations generated/ml-100k/Ready_U-KNN_reco.csv\",\n",
+ " estimations_path=\"Recommendations generated/ml-100k/Ready_U-KNN_estimations.csv\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### I-KNN - on top baseline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimating biases using als...\n",
+ "Computing the msd similarity matrix...\n",
+ "Done computing similarity matrix.\n",
+ "Generating predictions...\n",
+ "Generating top N recommendations...\n",
+ "Generating predictions...\n"
+ ]
+ }
+ ],
+ "source": [
+ "sim_options = {\n",
+ " \"name\": \"cosine\",\n",
+ " \"user_based\": False,\n",
+ "} # compute similarities between items\n",
+ "algo = sp.KNNBaseline()\n",
+ "\n",
+ "helpers.ready_made(\n",
+ " algo,\n",
+ " reco_path=\"Recommendations generated/ml-100k/Ready_I-KNNBaseline_reco.csv\",\n",
+ " estimations_path=\"Recommendations generated/ml-100k/Ready_I-KNNBaseline_estimations.csv\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "943it [00:00, 11286.27it/s]\n",
+ "943it [00:00, 10874.86it/s]\n",
+ "943it [00:00, 11509.97it/s]\n",
+ "943it [00:00, 11855.81it/s]\n",
+ "943it [00:00, 11574.00it/s]\n",
+ "943it [00:00, 11080.19it/s]\n",
+ "943it [00:00, 11550.84it/s]\n",
+ "943it [00:00, 12148.14it/s]\n",
+ "943it [00:00, 10779.39it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Model | \n",
+ " RMSE | \n",
+ " MAE | \n",
+ " precision | \n",
+ " recall | \n",
+ " F_1 | \n",
+ " F_05 | \n",
+ " precision_super | \n",
+ " recall_super | \n",
+ " NDCG | \n",
+ " mAP | \n",
+ " MRR | \n",
+ " LAUC | \n",
+ " HR | \n",
+ " Reco in test | \n",
+ " Test coverage | \n",
+ " Shannon | \n",
+ " Gini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Self_TopPop | \n",
+ " 2.508258 | \n",
+ " 2.217909 | \n",
+ " 0.188865 | \n",
+ " 0.116919 | \n",
+ " 0.118732 | \n",
+ " 0.141584 | \n",
+ " 0.130472 | \n",
+ " 0.137473 | \n",
+ " 0.214651 | \n",
+ " 0.111707 | \n",
+ " 0.400939 | \n",
+ " 0.555546 | \n",
+ " 0.765642 | \n",
+ " 1.000000 | \n",
+ " 0.038961 | \n",
+ " 3.159079 | \n",
+ " 0.987317 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Baseline | \n",
+ " 0.949459 | \n",
+ " 0.752487 | \n",
+ " 0.091410 | \n",
+ " 0.037652 | \n",
+ " 0.046030 | \n",
+ " 0.061286 | \n",
+ " 0.079614 | \n",
+ " 0.056463 | \n",
+ " 0.095957 | \n",
+ " 0.043178 | \n",
+ " 0.198193 | \n",
+ " 0.515501 | \n",
+ " 0.437964 | \n",
+ " 1.000000 | \n",
+ " 0.033911 | \n",
+ " 2.836513 | \n",
+ " 0.991139 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Random | \n",
+ " 1.521845 | \n",
+ " 1.225949 | \n",
+ " 0.047190 | \n",
+ " 0.020753 | \n",
+ " 0.024810 | \n",
+ " 0.032269 | \n",
+ " 0.029506 | \n",
+ " 0.023707 | \n",
+ " 0.050075 | \n",
+ " 0.018728 | \n",
+ " 0.121957 | \n",
+ " 0.506893 | \n",
+ " 0.329799 | \n",
+ " 0.986532 | \n",
+ " 0.184704 | \n",
+ " 5.099706 | \n",
+ " 0.907217 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_I-KNN | \n",
+ " 1.030386 | \n",
+ " 0.813067 | \n",
+ " 0.026087 | \n",
+ " 0.006908 | \n",
+ " 0.010593 | \n",
+ " 0.016046 | \n",
+ " 0.021137 | \n",
+ " 0.009522 | \n",
+ " 0.024214 | \n",
+ " 0.008958 | \n",
+ " 0.048068 | \n",
+ " 0.499885 | \n",
+ " 0.154825 | \n",
+ " 0.402333 | \n",
+ " 0.434343 | \n",
+ " 5.133650 | \n",
+ " 0.877999 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_I-KNNBaseline | \n",
+ " 0.935327 | \n",
+ " 0.737424 | \n",
+ " 0.002545 | \n",
+ " 0.000755 | \n",
+ " 0.001105 | \n",
+ " 0.001602 | \n",
+ " 0.002253 | \n",
+ " 0.000930 | \n",
+ " 0.003444 | \n",
+ " 0.001362 | \n",
+ " 0.011760 | \n",
+ " 0.496724 | \n",
+ " 0.021209 | \n",
+ " 0.482821 | \n",
+ " 0.059885 | \n",
+ " 2.232578 | \n",
+ " 0.994487 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_U-KNN | \n",
+ " 1.023495 | \n",
+ " 0.807913 | \n",
+ " 0.000742 | \n",
+ " 0.000205 | \n",
+ " 0.000305 | \n",
+ " 0.000449 | \n",
+ " 0.000536 | \n",
+ " 0.000198 | \n",
+ " 0.000845 | \n",
+ " 0.000274 | \n",
+ " 0.002744 | \n",
+ " 0.496441 | \n",
+ " 0.007423 | \n",
+ " 0.602121 | \n",
+ " 0.010823 | \n",
+ " 2.089186 | \n",
+ " 0.995706 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_TopRated | \n",
+ " 1.030712 | \n",
+ " 0.820904 | \n",
+ " 0.000954 | \n",
+ " 0.000188 | \n",
+ " 0.000298 | \n",
+ " 0.000481 | \n",
+ " 0.000644 | \n",
+ " 0.000223 | \n",
+ " 0.001043 | \n",
+ " 0.000335 | \n",
+ " 0.003348 | \n",
+ " 0.496433 | \n",
+ " 0.009544 | \n",
+ " 0.699046 | \n",
+ " 0.005051 | \n",
+ " 1.945910 | \n",
+ " 0.995669 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_BaselineUI | \n",
+ " 0.967585 | \n",
+ " 0.762740 | \n",
+ " 0.000954 | \n",
+ " 0.000170 | \n",
+ " 0.000278 | \n",
+ " 0.000463 | \n",
+ " 0.000644 | \n",
+ " 0.000189 | \n",
+ " 0.000752 | \n",
+ " 0.000168 | \n",
+ " 0.001677 | \n",
+ " 0.496424 | \n",
+ " 0.009544 | \n",
+ " 0.600530 | \n",
+ " 0.005051 | \n",
+ " 1.803126 | \n",
+ " 0.996380 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_IKNN | \n",
+ " 1.018363 | \n",
+ " 0.808793 | \n",
+ " 0.000318 | \n",
+ " 0.000108 | \n",
+ " 0.000140 | \n",
+ " 0.000189 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000214 | \n",
+ " 0.000037 | \n",
+ " 0.000368 | \n",
+ " 0.496391 | \n",
+ " 0.003181 | \n",
+ " 0.392153 | \n",
+ " 0.115440 | \n",
+ " 4.174741 | \n",
+ " 0.965327 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Model RMSE MAE precision recall F_1 \\\n",
+ "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n",
+ "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n",
+ "0 Ready_Random 1.521845 1.225949 0.047190 0.020753 0.024810 \n",
+ "0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 \n",
+ "0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 \n",
+ "0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 \n",
+ "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n",
+ "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n",
+ "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n",
+ "\n",
+ " F_05 precision_super recall_super NDCG mAP MRR \\\n",
+ "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n",
+ "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n",
+ "0 0.032269 0.029506 0.023707 0.050075 0.018728 0.121957 \n",
+ "0 0.016046 0.021137 0.009522 0.024214 0.008958 0.048068 \n",
+ "0 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 \n",
+ "0 0.000449 0.000536 0.000198 0.000845 0.000274 0.002744 \n",
+ "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n",
+ "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n",
+ "0 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 \n",
+ "\n",
+ " LAUC HR Reco in test Test coverage Shannon Gini \n",
+ "0 0.555546 0.765642 1.000000 0.038961 3.159079 0.987317 \n",
+ "0 0.515501 0.437964 1.000000 0.033911 2.836513 0.991139 \n",
+ "0 0.506893 0.329799 0.986532 0.184704 5.099706 0.907217 \n",
+ "0 0.499885 0.154825 0.402333 0.434343 5.133650 0.877999 \n",
+ "0 0.496724 0.021209 0.482821 0.059885 2.232578 0.994487 \n",
+ "0 0.496441 0.007423 0.602121 0.010823 2.089186 0.995706 \n",
+ "0 0.496433 0.009544 0.699046 0.005051 1.945910 0.995669 \n",
+ "0 0.496424 0.009544 0.600530 0.005051 1.803126 0.996380 \n",
+ "0 0.496391 0.003181 0.392153 0.115440 4.174741 0.965327 "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dir_path = \"Recommendations generated/ml-100k/\"\n",
+ "super_reactions = [4, 5]\n",
+ "test = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n",
+ "\n",
+ "ev.evaluate_all(test, dir_path, super_reactions)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# project task 3: use a version of your choice of Surprise KNNalgorithm"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# read the docs and try to find best parameter configuration (let say in terms of RMSE)\n",
+ "# https://surprise.readthedocs.io/en/stable/knn_inspired.html##surprise.prediction_algorithms.knns.KNNBaseline\n",
+ "# the solution here can be similar to examples above\n",
+ "# please save the output in 'Recommendations generated/ml-100k/Self_KNNSurprisetask_reco.csv' and\n",
+ "# 'Recommendations generated/ml-100k/Self_KNNSurprisetask_estimations.csv'"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/P0. Data preparation.ipynb b/P0. Data preparation.ipynb
index e905e56..c40508c 100644
--- a/P0. Data preparation.ipynb
+++ b/P0. Data preparation.ipynb
@@ -9,7 +9,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -58,7 +58,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
@@ -137,7 +137,7 @@
"4 166 346 1 886397596"
]
},
- "execution_count": 17,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -155,7 +155,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -184,7 +184,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -226,7 +226,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -268,7 +268,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -283,7 +283,7 @@
"Name: user, dtype: float64"
]
},
- "execution_count": 21,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -301,7 +301,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -312,7 +312,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -339,7 +339,7 @@
" 18: 'Western'}"
]
},
- "execution_count": 23,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -350,7 +350,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -359,7 +359,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -503,7 +503,7 @@
"[3 rows x 24 columns]"
]
},
- "execution_count": 25,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -514,7 +514,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@@ -524,7 +524,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@@ -533,7 +533,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
@@ -543,7 +543,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -616,7 +616,7 @@
"4 5 Copycat (1995) Crime, Drama, Thriller"
]
},
- "execution_count": 29,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -635,7 +635,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
@@ -644,7 +644,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
diff --git a/P1. Baseline.ipynb b/P1. Baseline.ipynb
index 3dbaf3a..85b9494 100644
--- a/P1. Baseline.ipynb
+++ b/P1. Baseline.ipynb
@@ -306,7 +306,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "658 ns ± 16.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n",
+ "471 ns ± 15.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n",
"Inefficient way to access items rated by user:\n"
]
},
@@ -324,7 +324,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "67.8 µs ± 1.68 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
+ "48.3 µs ± 1.51 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
]
}
],
@@ -1318,7 +1318,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
@@ -1342,7 +1342,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -1446,24 +1446,24 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "RMSE: 1.5230\n",
- "MAE: 1.2226\n"
+ "RMSE: 1.5165\n",
+ "MAE: 1.2172\n"
]
},
{
"data": {
"text/plain": [
- "1.2226271020019277"
+ "1.2172144988785374"
]
},
- "execution_count": 30,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -1496,34 +1496,6 @@
"\n",
"sp.accuracy.mae(predictions, verbose=True)"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
diff --git a/P2. Evaluation.ipynb b/P2. Evaluation.ipynb
index fdea66d..e89d78d 100644
--- a/P2. Evaluation.ipynb
+++ b/P2. Evaluation.ipynb
@@ -1684,7 +1684,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.5"
+ "version": "3.8.8"
}
},
"nbformat": 4,
diff --git a/P3. k-nearest neighbours.ipynb b/P3. k-nearest neighbours.ipynb
index 17eecae..a15592c 100644
--- a/P3. k-nearest neighbours.ipynb
+++ b/P3. k-nearest neighbours.ipynb
@@ -1049,7 +1049,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.5"
+ "version": "3.8.8"
}
},
"nbformat": 4,