diff --git a/P0. Data preparation.ipynb b/P0. Data preparation.ipynb
new file mode 100644
index 0000000..63b0137
--- /dev/null
+++ b/P0. Data preparation.ipynb
@@ -0,0 +1,682 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Building train and test sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import scipy.sparse as sparse\n",
+ "import time\n",
+ "import random\n",
+ "import evaluation_measures as ev\n",
+ "import matplotlib\n",
+ "import matplotlib.pyplot as plt\n",
+ "import os\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "import helpers\n",
+ "\n",
+ "os.makedirs('./Datasets/', exist_ok = True)\n",
+ "\n",
+ "helpers.download_movielens_100k_dataset()\n",
+ "\n",
+ "df=pd.read_csv('./Datasets/ml-100k/u.data',delimiter='\\t', header=None)\n",
+ "df.columns=['user', 'item', 'rating', 'timestamp']\n",
+ "\n",
+ "train, test = train_test_split(df, test_size=0.2, random_state=30)\n",
+ "\n",
+ "train.to_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, index=False)\n",
+ "test.to_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None, index=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Interactions properties"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### How data looks like?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " item | \n",
+ " rating | \n",
+ " timestamp | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 196 | \n",
+ " 242 | \n",
+ " 3 | \n",
+ " 881250949 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 186 | \n",
+ " 302 | \n",
+ " 3 | \n",
+ " 891717742 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 22 | \n",
+ " 377 | \n",
+ " 1 | \n",
+ " 878887116 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 244 | \n",
+ " 51 | \n",
+ " 2 | \n",
+ " 880606923 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 166 | \n",
+ " 346 | \n",
+ " 1 | \n",
+ " 886397596 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user item rating timestamp\n",
+ "0 196 242 3 881250949\n",
+ "1 186 302 3 891717742\n",
+ "2 22 377 1 878887116\n",
+ "3 244 51 2 880606923\n",
+ "4 166 346 1 886397596"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[:5]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Sample properties"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "We have 943 users, 1682 items and 100000 ratings.\n",
+ "\n",
+ "Average number of ratings per user is 106.04. \n",
+ "\n",
+ "Average number of ratings per item is 59.453.\n",
+ "\n",
+ "Data sparsity (% of missing entries) is 6.3047%.\n"
+ ]
+ }
+ ],
+ "source": [
+ "users, items, ratings=len(set(df['user'])), len(set(df['item'])), len(df)\n",
+ "\n",
+ "print('We have {} users, {} items and {} ratings.\\n'.format(users, items, ratings))\n",
+ "\n",
+ "print('Average number of ratings per user is {}. \\n'.format(round(ratings/users,2)))\n",
+ "print('Average number of ratings per item is {}.\\n'.format(round(ratings/items,4)))\n",
+ "print('Data sparsity (% of missing entries) is {}%.'.format(round(100*ratings/(users*items),4)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6UAAAHvCAYAAACsfXllAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzdeZgcZbn+8fvJAoKgLIaYBGQQWUJImCSNBBGMC8vJJohL+ImAeBL0wEEE0ShCAqIGgSPimkQQUFlUDpgFkUWQXZgJCQdDFIID2QghrNmAwPP7o2omXVM9PZ3MTL01qe/nuvqa7qrq6rufqUn66ap6y9xdAAAAAACE0CN0AAAAAABAcdGUAgAAAACCoSkFAAAAAARDUwoAAAAACIamFAAAAAAQDE0pAAAAACAYmlIAyIiZTTEzj28jQ+fpbsxsJzP7npk9amavmdnbcS1fDp2tq5jZVWXbTF3oPAAAdAWaUgCdruxDdPPtoBqeM75s+SkZxEQ3YmZ9JTVK+rakeknbSbKgoTaDmY2Mv5yYQpMJAECkV+gAAArh+5I+HjoEurVzJNXF9++X9FtJKyS5pDcDZdocIyVNju/fLakpVBAAAPKCphRAFj5mZp9w9ztCB0G3NSr++ZKkI9x9bcgwWXH3kySdFDgGAABdisN3AXSl8sbh+8FSYEuwW/zzn0VpSAEAKAqaUgBdabGkm+L7B5rZp0KGQbe2Vfzz9aApAABAp6MpBdDVviPp7fj+hWbWc3NXVDYQ0t0dXdbM7m5eJn7cw8xOjqc/b2ZrzOxxM/uOmW3f6rnvNbPvmtljZvaqmb1iZveY2Wc34z2NMrM/mdkSM3s9/nmdmR28Cet4j5mdY2b3mtlzZvaGma2MH3/DzLZr5/lNcS2a4sfvMLPTzew+M1sRj3J796a+t7L1b2tmXzOzu+J8r8c1vs/MvmVm727jeS2jFZdN/kiFgbRGbmKek8qee1I8rWRmvzKzp+LffWK9Fjk0Hv33r2a2LH4fa8zs32Z2vZmNrfKaU+L3Mbls8l0V3svdrZ5XdfTdeOCkxABhZvY+M7vUzBbG+V42swfM7L/MrKbTdszsGDObE//+18fbyG8tHrSsUg3bWM8nzez3Zva0ma2N17XUzOab2R/iTDvXkqmN9SfqZmZ9zOwCM/u/+G/zVTNrNLNJZrbNJqz3qLj2T1o00vNaM1sUT/twO8/d5O2rxkw1j8Rcy7Lx3/l/mdntZrY83p5Xx7/rR8zsCjP7jJltVen5Zes52Mx+YWYL4m1tvZk9a2Y3mNnodp5bafvdx8wuM7Mn4t9f1W0MwBbC3blx48atU2+KBp9xSQvjx1eVTTupjeeML1tmSjvrvXsTMlRcVtEgM83LbCfpjrLHrW9zJe0YP+9gSc9XWfaSKpmmlC03UtLPqqznLUmTa3ifJ0l6tcp6XNJzkg6uso6meLkmSXtIerzCOtqteRvrHiFpaTv5XlB0nmi1elW7jdzETCeVb4+SJknaUG29kn5dY5Y/S3pXB97L3a2ed1XZvLoK6x1ZNn+KpKMUnXfb1vpvk7R1ldr0lvT7Ks/fIOms1jWssJ5tJM2u8T2f0Qn/1twtaaikJVVe50lJe7Szvj6S7qwh868k9e6s7avG91p1W9jE7WbPuB61/H7q23iNd0q6vobnz5a0fRvraL39nqDotI/W60htY9y4cduybgx0BCALUyQdp+gQzClmdq27vxE2UsKvFY0OfL+iD+TPSdpd0qnxz6GSLjOzyZL+ouh9/ErSfZLekHSopAmKBo87y8xu9fYHdfqqpKMVNWS/kvSYpG0VNRXHKjqSZYqZrXL3n1ZagZl9VdJl8cO1kv4o6QFJqyS9J17XOEl9Jd1hZge6+4IqmbaW9L+SBsXv7UZJyxR9UO/bzvuplG+opL8qalAk6VFJ10p6VtJ7JX1W0iGSdpY028yOcPe7y1ZxvaR58f3mw8D/oWjve7nHNzVbmc8pqtMrkq5WdNmZtyQdEE9rto2iQ4f/JulhSYskrVFUm70lfUHSTvG6rlH0uy3X/F7Gx68pSedWyP5CB95LvaSzFV0qZ5qkB+PMJUlfVtREHK5oJOPz2ljHdEmfie+vV9TcPKioJiVJX5J0iaJtrZrvS2reS7Zc0WjJ/5C0WtGXQB9Q9AXPYbW/varerWjbHaCo8b5Z0ouS9okzvy9+zTvNrN7dX229AjPbSdF73TOeND9e51OKjvbYX1GTOSBeZy+1PwhVrdtXZszMJP1BUT2kaLv8o6SnFY1kvaOkgZI+qmibqrSOrRV9kTcinrRI0g2SnojX8QFFDebeiraDm83scHd/u8Lqmh2iaNt8S9IViv49Xq/od/jcZrxVAN1J6K6YGzduW95NG7/dXlg27fKy6adXeE7IPaUu6dsVlumjjXv5Nij68Pa8pCEVlv1C2bpuaeM1p7R6zccl7VJhuaMVfbBzRY3P+yosUypb5tFKy8TLjVHUOLukh9pYpqlVrq91wjbQQ8k9rpdJ6lFhuXPLlnlW0js6+ruvIdtJrd7vE5L6t/OcQyXtUGX+O5Xcw/iRGraBkTVkvaps+boK80e2ei/PSNqrwnIfLNteXlSFvaWKvphpXs9KSftXWKauwvZyUqtlekp6OZ7XVGkbb/U3tm8Hfpet96idWmGZ7STdVbbMT9pY103x/LclfbWNZbZT9MVU87qO6oztq8b3WnVbqHVZRf92NM+bJalnlfXsJ2nnCtN/VLaOH0rqVWGZ3ooa8eblvlzD9rtc0n4drRU3bty6341zSgFk5XuKGixJOsfaOc8xY39x99TowO6+UlLzXsqeivZunObuj1VY9jeKDoeTokvgtHckygZJn3P35yus62ZJl8YPt5X0lQrPP0/RnprXJI1x92crvYi7z5Y0NX54kJl9qJ1cN7n7j9pZphZjFO1xlaSHFDW6qb0k7v5dSXPih7tJOr4TXntTuKTx7r6s6kLu97r7y1Xmr1G096x5G/9C50XcJMe7+5OtJ7r7w4r2ZEnRnrAPVnju18run+buqT3Q7t6k9vcO9lG051KS/lRpGy9b30p3X9jO+mp1vbv/rMJrrFb0pVfz3tEvmdkO5cuY2TBt3Lv9I3f/cRt5m9fVvJfzzHYy1bR9ZewDZfevdPe32lrQ3Re4+6ryaWbWT9J/xQ//192/4e4bKjz3TUn/qWgPrNR+rSTpFK9+NAeALRRNKYBMuPsKbTzUdBdJZwSM01rFw2Nj95fdX6Hqhy3eF//cWhsPAWzLX9z9H1XmX6boMDZJOqZ8hpntqI2HRl7n7kvbea3flt0/op1lf9LO/FqVj7R8sbt7lWWnlt3PeoTme919fmesyN1fk/R/8cODOmOdm+hRd7+3yvy/lt3fr3yGmb1DG7eNZYoO76zIo0OsU1/MlFnX1ut0sUvbmhH/+9P8d7CNokNqyzV/ieDV1hOv6yVJt8QPD4sPZW1Lp21fnaj8kkqD2lyqbZ/VxtGwL6m2YNyYNn8Zslc7AzQ9o2jPLYAC4pxSAFm6WNFev50kfd3Mfu7uLwbOJEl/rzJvRdn9xkp7+9pYdsd2XvPOajPd/Tkze0LReWx7m9m73b1578wh2vil4ltm1vr8xdZ6l90fWGW5txSdU9cZmvfEuaTb21n2AW081zDrZq5aE5cQNx+flfRJRXvN+yrKbBUW37VT0m2ah9qZX/7lRevt8wBt3E7uaWc7l6LD34dUmuHur5jZw4q2gU+Y2U2Kvuy4N25SusIris7XrOav2riH70BF5/k2OzT++bKkD0anXVa1ddnP9ys6RLeSmrevDN2n6IuDbSRNjr/kurrSESBtOLTs/q41/PtTvq0NVHRId8Vc7Xx5BWALRlMKIDPxh9WLJF2k6PC+SZK+ETaVpGhgoLaUXxez2nKtl31HO8s+1c785mX2V9T0vFcbDxmsK1vmK6p8eG9bqjXLq9x9/Sasq5p+8c/n4j2IbXL3t81skaLGaCcz28qzGwirvb3MkiQzG6xo4Ke9alzvuzY70eZrb5Ckattn/7L7T6t97S1zqqIvXt6l6LDYoyWtMbO/K2qK7pB0fw3Nb60W1dDQlP/N9W81ry7+uaM2DqpVq2p/UzVtX1ly9xfN7GuSfqHoc+CZks40s+cVfUF0r6Q/u3tbjXZd2f3fb+LLd6taAcgOh+8CyNpPFB0eKEmnmVnrD4eZ24QPxp31AVpKHkLXljVl98vPwa14Xc8aVbvm4Loq8zZV87Vd11RdaqPVFZ6bhXbfczwq6x3a2JAuVvSB/quS/p+iQ46PiW/Nh2SH+P+1I9vnO8vub+q2meLuDYpGbr1GG2v8TkkfU3Q+9D2SFpnZ5zc9akUd+XuSusffVKdx92mKRte9Uxu3m10UfXlwqaQFZna/mVU697hQtQKQDfaUAsiUu68zs+8q+lC/jaIPqF/uzNcws+7whdu2NSxT3iisbuP+ye7+686J1Klek7SDku+hmvImoeqe1QBOU/SBXYpGE/3PSgO7SJKZnZNZqs5V3rBt6rZZkbv/W9KJZnaKosu/fEjShyV9RNHffp2k35rZ7pUGGttEHfl7an68g6Rn3X33DmbJg3b/DXT3v0n6m5ntrOiQ3IMV/W4OjJ//IUn3VbhUU3PtXNGou535ZR2AguoOH9wAbHmuUHRdOykaCfMD1RYu03xIZ7Vv26XoGp15V8t7bl7GlbxOX/lhbiHOXazF8vjne9sbaTm+bmLzwFCrMjx0t1afiH9ukHRGWw1prLs2NOWjw76/huVrWUaS5O7r3f0ud/+eu/+Hogb/m4q2a0k6L26MOmJPa/9E0PK/udaj4Tb/Te1iZr2VT+WHX3fav4Huvsrdb3b3b7r7CEXXdL02nt1b6cGMmmtliq7ZCgAdRlMKIHPxYCfnxQ97Sbqgxqc2X5KjvUN+Q4x8uqk+Vm2mmb1XGwcl+lfZIEdSdOhj8wf69kbTDeXh+KdpY1PXlg9p457Sh6stGEjf+OeqapeFMbOhii6HUk35XqV2R9PJ0HxF1zGVohFl2/t8MHJzX8jdV7v7DxWdoytFgwUduLnri71b0rB2lvlo2f1HWs37W/zzHZIO62CWrlK+7bX5b6CZ9VR0LdLNEo/mfaI2fhE23My2KVvkb2X38/rvD4BuhqYUQCjXaeNlJcYrGuSmPc3Xr9vdzKrtqTm9I8EycpSZVRsJ93RF10aVpP8tnxFf9/HW+OGHzSyPHwxvLLv/9Xb2Yn2zjeflRfP5iruYWbXzXc+rMq9Z+WGjtR7a3OXiAa5uix/2l/SZtpY1s5FqY+TdTdRUdr8zTidq8zqYZtZHG6+Bu1Yb/36aXVN2f3Lc2OVN+fU7q32pNV7tfzlSVXw0wJKySeW/n+u18aiVb5pZbrZjAN0XTSmAIOKRMpvPvzNJ/13D08o/SF5UqdExswvU/p65POgl6Yb4w3KCmY2V9PX44VpF59+29h1t3LN1vZm1vu5i63XubmaXmNku1ZbrRHO0cdCfQyRdXGnvm5l9W9LY+OFiSb/LJt4mad6rZpIubD3TIt9VNEhMe/5ddr+9PXtZ+1HZ/Z+a2f6tF4ivM3lVtZWY2VAzO9fM+lZZ5j3a2Pi6ql/3tFb/z8xS56fHTdN12jhAz5Wt93i7+9+18QuRQyX9zszaHEHZzHqZ2afM7NROyF2r27Xx2sWnmlnqUHEzK6mdaw2b2efN7Iut9n62XmaEpKHxw6fLR9B298Vlr7GXpFnxkR1trauHmX3CzL5TLReAYmOgIwDBuPtsM3tA0eGbtXzbfqWiS8jsJOnTku41s98puhTG+xTtISgp+iZ/fJeE7jw3K2pi/mFmMyT9n6LBWo5U9GG9ueH+ZvwhMMHd55rZVyTNUHSZhT+b2f2S/qyo8XlTUZ32VTS4TPPhfJd12TtK5nvbzI5XdImJbSSdJemj8e9riaJDYj8bZ1Oc94ROvCRNZ/q5pJMV7bk+3czqFe29fk7SbopG4B2qaE/WOknDq6zrXkXvtbeks82suSFrPl/wRXcPcgizu99pZldJOknROYmPxI8fUHTYcUlRHd4l6Y+K/gal9Ki/71Z0SP7keJt8QNK/FA1gtZOkwYpqtlO8/O/c/dkOxp+naKCiX8TXzbxJ0eGue0v6kjae6/tvbfwyrLWT4+UHS/qcpCPN7PeSGiS9pGg7HqDod324or+7KzqYu2buvszMrpX0BUW1e8TMfq5ou9tO0SHVx0l6UdE1Wdvam7qXpMmSfmJmtyv60mWxom1wF0VN+dHaeKRGpUGovqVodOWPKzos+mkzu1HRdY5XKjrn9b2KjoA5PL5/pyp8qQMAEk0pgPC+peQ5Sm1y95Vm9gVFDcHWivbAHdJqsdmKPlzmvSn9saIBQ06V9O0K813SBe7+07ZW4O5XxNcWnKGoyatUj3KrJGXW9Ln7PDP7uKI9UP0U7RmstHfwRUn/r9UIn7kRv4//lvRTRUcYHab0eYdPSPqkpF+1s64XzOwSRdv9dkqfT/03deB8zU4wUVGuTys6v/LLSo6O/baivfivaGNT2nq05ObznXuqcq3K3RC/Zke9IumLiv7+j4xvrS2SdIS7v1ppBe7+qpl9WNJ0RU3pDnG2avlaD5jU1c5Q1DTXKzpEd3Kr+csVXZqo2rWLm38/79TGa8hW8qakc9091Xi7+5tmNkrR5WO+oqhhP14bD5GuhOuQAmgTh+8CCMrd71H6/K5qy9+i6Nv3X0t6VtG5TSsl3aVoD8I4d+8W17tz99MkjZY0S9GH2zfinzdIOsTdp9SwjlmS9lDUOMxUtMdjnTbW5UFFh9qNldTf3V/o9DdSPd+DivbMnKmo4Vqp6MPuqjjbOZL2dPe/ZJlrU7n7LxQ1/H9QtIf0TUnPK9oLeKakkrs/VeO6vq1oj9at8bpyM9qwu7/p7p+RdKyifCsV7UF7VtGh1Ye4+6WSykfLfbHVOv6mqHH6uqIvJBYqOpf27fjnAkVHPXzE3cd31t+ru89TtBfzQkmPK2qWV0t6VNEXP0Pc/el21vGqu49X9OXJZfFzVykaeXm1pCcVHeVwpqLttpbziDuNu7+o6MiSSXG21You57NA0vckHRAfilzN9xR98fFdSX9RdG7vOkXv8SVFg41dJGk/d7+oSpY33P2/FR2NMVXS3xVtLxsUnXbwb0m3aGPtT9zkNwygMCw6rQsAAKA28aGan4of7hw3SyFyNH+I+Zu7jwyRAQDQcewpBQAANYsHOxoTP5wfqiEFAGw5aEoBAIAkycz2NLNdq8wfoGgQoa3iSdMyCQYA2KIx0BEAAGh2sKRfm9k9ikYKXqTofMOdJY1QNGLytvGyDykaFAgAgA6hKQUAAOV6KbqcSFuXFJGkuyUd6+5vVVkGAICa0JQCAIBmsxRdAuVwSQMVXa90J0UjBK9QNMLq9fGozwAAdIpcjL77nve8x+vq6kLH6DIrV65Unz59Qsdokbc8RUHdAQAAUFSNjY0vuHvFD8O52FNaV1enhoaG0DEAAAAAAF3AzJ5pax6j72ZgypQpoSMk5C1PUVB3AAAAIC0Xh++WSiXfkveUmpnyUOdmectTFNQdAAAARWVmje5eqjSPPaUAAAAAgGBoSgEAAAAAwdCUZiBvhybnLU9RUHcAAAAgjaYUAAAAABAMAx1lIG8D3OQtT1FQdwAAABQVAx0BAAAAAHKp3abUzK40s+fN7PGyaTeY2bz41mRm8+LpdWa2rmzeL7syPAAAAACge+tVwzJXSfqppGuaJ7j755rvm9mlkl4pW36Ru9d3VsAtweTJk0NHSMhbnqKg7gAAAEBaTeeUmlmdpNnuvn+r6SbpWUkfc/cn21quPVv6OaUAAAAAUGRdeU7poZJWuPuTZdP2MLNHzexvZnZolVATzazBzBpWrlzZwRj51r9//9AREvKWpyioOwAAAJDW0ab0OEnXlT1eLul97j5U0pmSrjWzd1V6ortPd/eSu5f69OnTwRj5tnz58pb7ixcv1kc/+lHtt99+GjRokH784x+3zJsyZYoGDBig+vp61dfX65ZbbpEk3X///RoyZIhKpZKefDLq/19++WUdccQRevvttzuUpyNuvvlmLViwoOXxeeedpzvuuEOSNHLkyM2+Luef/vQnDRkyRPX19SqVSrrvvvskSc8884yGDRum+vp6DRo0SL/8Zfc6Zbm9uv/zn/9s+d3X19frXe96ly677DJJ2W0bnaWrto2FCxfq4IMP1tZbb61LLrkkNf+tt97S0KFDNWbMmJZpd955Z8t28+EPf1hPPfXUZr02AAAAuoi7t3uTVCfp8VbTeklaIWnXKs+7W1KpvfUPHz7ct2RRmSPLli3zxsZGd3d/9dVXfa+99vJ//OMf7u4+efJkv/jii1PPP+aYY3zx4sV+7733+plnnunu7meddZbfddddHc7TESeeeKL/4Q9/qDjvIx/5iD/yyCObtd7XXnvN3377bXd3nz9/vu+zzz7u7v7666/7+vXrW5bZfffdfenSpZv1GiFsSt03bNjgffv29aamJnfPbtvoLF21baxYscIffvhh//a3v12xHpdeeqkfd9xxPnr06JZpe+21ly9YsMDd3X/2s5/5iSeeuFmvDQAAgM0nqcHb6Ac7sqf0E5IWuvuS5glm1sfMesb33y9pL0lPd+A1tgjDhg1rud+vX7+Wx9tvv70GDhyopUuXVn1+7969tXbtWq1du1a9e/fWokWLtHjxYo0cObLN59x6663ad999NWzYMJ1++ukte46a97g123///dXU1CRJOvroozV8+HANGjRI06dPb1lmu+220znnnKMDDjhAI0aM0IoVK/TAAw9o5syZOvvss1VfX69FixbppJNO0h//+MdUlttuu00HH3ywhg0bps985jNavXp11fe73XbbKTpdWVqzZk3L/a222kpbb721JOn1118Puidwc5RvB+258847teeee2r33Xevulxnbxvlex/zuG3ssssuOvDAA9W7d+/UvCVLlmjOnDn6z//8z8R0M9Orr74qSXrllVc4jBoAACBnarkkzHWSHpS0j5ktMbMvxbPGK3noriQdJumx+BIxf5T0ZXd/sTMDd0eNjY0Vpzc1NenRRx/VQQcd1DLtpz/9qYYMGaKTTz5ZL730kiTpW9/6lk444QT94Ac/0GmnnaZzzjlHF154YZuvt379ek2YMEGzZs1SY2OjnnvuucT8M844o+LzrrzySjU2NqqhoUGXX365Vq1aJSlqDEeMGKH58+frsMMO04wZM/ShD31I48aN08UXX6x58+Zpzz33rLjOF154QRdeeKHuuOMOzZ07V6VSSf/zP/8jKTqkc+bMmRWfd9NNN2nffffV6NGjdeWVV7ZMX7x4sYYMGaLddttN3/zmN7tVg9HWdlDJ9ddfr+OOOy4xLYttoy152jbacsYZZ+iHP/yhevRI/rP2q1/9SqNGjdKuu+6q3/zmN5o0adImrRcAAABdq92m1N2Pc/d+7t7b3Xd19yvi6Se5+y9bLXujuw9y93p3H+bus7oqeHcyceLE1LTVq1fr2GOP1WWXXaZ3vSs67fYrX/mKFi1apHnz5qlfv34666yzJEn19fV66KGHdNddd+npp59Wv3795O763Oc+p+OPP14rVqxIrHvhwoXaY489tNdee8nMdPzxxyfmV9pjJUmXX355yx6vxYsXt5yjuNVWW7XsTRs+fHjL3rNaPPTQQ1qwYIEOOeQQ1dfX6+qrr9YzzzwjSbrgggs0bty4is875phjtHDhQt18880699xzW6bvtttueuyxx/TUU0/p6quvTr33PKu0HVTyxhtvaObMmfrMZz7TMi2rbaMtedo2Kpk9e7Z22WUXDR8+PDXvRz/6kW655RYtWbJEX/ziF3XmmWfWvF4AAAB0vY4OdIQazJgxI/H4zTff1LHHHqvPf/7z+tSnPtUyvW/fvurZs6d69OihCRMm6OGHH048z9114YUX6txzz9X555+vH/7wh5owYYIuv/zymrP06tVLf//731ser1+/XpJ0991364477tCDDz6o+fPna+jQoS3zevfu3XIIbc+ePbVhw4aaX8/ddfjhh2vevHmaN2+eFixYoCuuuKLm5x922GF6+umn9cILLySm9+/fX/vvv7/uvffemtcVWuvtoC1//vOfNWzYMPXt27dlWlbbRvkh0XnfNsrdf//9mjlzpurq6jR+/Hj99a9/1fHHH6+VK1dq/vz5LUcjfO5zn9MDDzywWa8BAACArkFTmjF315e+9CUNHDgwtcemfHTWm266Sfvvn7zc6zXXXKNRo0Zpp5120tq1a9WjRw/16NFDa9euTSy37777qqmpSYsWLZIkXXfdxqOs6+rqWu7PnTtX//73vyVF59rtuOOO2nbbbbVw4UI99NBD7b6X7bffXq+99lrVZUaMGKH777+/ZcTTNWvW6F//+lfV5zz11FPNA2Vp7ty5ev3117XzzjtryZIlWrdunSTppZde0n333ad99tmn3ZzdzXXXXZc6dDerbWPu3LmS8rtttOUHP/iBlixZoqamJl1//fX62Mc+pt/+9rfacccd9corr7Ss9/bbb9fAgQM36zUAAADQNXqFDtBd1E2aU3V+09TRNa3n/vvv129+8xsNHjxY9fX1kqTvf//7GjVqlL7xjW9o3rx5MjPV1dVp2rRpLc9bu3atrrrqKt12222SpDPPPFOjRo3SVlttpWuvvTbxGu94xzs0ffp0jR49Wttuu60OPfTQlgbh2GOP1Re+8AUNGjRIBx10kPbee29J0lFHHaVf/vKXGjhwoPbZZx+NGDGi3fcyfvz4lr1xbR0S3KdPH1111VU67rjj9Prrr0uSLrzwQu29994677zzVCqVUodp3njjjbrmmmvUu3dvbbPNNrrhhhtkZnriiSd01llnyczk7vr617+uwYMH11L2bmPNmjW6/fbbE797SZltG9dcc02ut43nnntOpVJJr776qnr06KHLLrtMCxYsaDkEvrVevXppxowZOvbYY9WjRw/tuOOOiXOUAQAAEJ4175EKqVQq+eZetzArHWlKly1bFnRAnrvvvluXXHKJZs+enYs8RZXHurfeNgAAAICuYGaN7l6qNI/DdzOwKaOuZiFveYqCugMAAABp7CmtUWwg/EQAACAASURBVEf2lDYfbpoXectTFNQdAAAARcWeUgAAAABALtGUAgAAAACCoSnNQOuRVEPLW56ioO4AAABAGueU1qizLgkDAAAAAEXDOaWBmVnoCAl5y1MU1B0AAABIoykFAAAAAARDUwoAAAAACIamNANjxowJHSEhb3mKgroDAAAAaTSlGZg1a1boCAl5y1MU1B0AAABIoynNwNixY0NHSMhbnqKg7gAAAEAaTWkGZs+eHTpCQt7yFAV1BwAAANJoSpFiZjr++ONbHm/YsEF9+vTZ5HMiR44cqebrz44aNUovv/xyp+aUpGeffVZHHHGEBg4cqP32209NTU2SpJNOOkl77LGH6uvrVV9fr3nz5nX6awMAAADouF6hAyB/3vnOd+rxxx/XunXrtM022+j222/XgAEDOrTOW265pZPSJZ1wwgk655xzdPjhh2v16tXq0WPj9ywXX3yxPv3pT3fJ6wIAAADoHOwpzYC7h46QUEueUaNGac6cOZKk6667Tscdd1zLvDVr1ujkk0/WBz/4QQ0dOlR/+tOfJEnr1q3T+PHjNXDgQB1zzDFat25dy3Pq6ur0wgsvSJKOPvpoDR8+XIMGDdL06dNbltluu+10zjnn6IADDtCIESO0YsWKqhkXLFigDRs26PDDD295/rbbbltjFbKXt+0AAAAAyAOa0gyUN155UEue8ePH6/rrr9f69ev12GOP6aCDDmqZ973vfU8f+9jH9PDDD+uuu+7S2WefrTVr1ugXv/iFtt12Wz3xxBM6//zz1djYWHHdV155pRobG9XQ0KDLL79cq1atkhQ1uyNGjND8+fN12GGHacaMGZKkmTNn6rzzzkut51//+pd22GEHfepTn9LQoUN19tln66233mqZf84552jIkCH62te+ptdff32TatQV8rYdAAAAAHlAU5qBU045JXSEhFryDBkyRE1NTbruuus0atSoxLzbbrtNU6dOVX19vUaOHKn169fr2Wef1T333NNyLuqQIUM0ZMiQiuu+/PLLW/aGLl68WE8++aQkaauttmo5b3X48OEt54eOGzdOF1xwQWo9GzZs0L333qtLLrlEjzzyiJ5++mldddVVkqQf/OAHWrhwoR555BG9+OKLuuiii2qqTVfK23YAAAAA5AFNKdo0btw4ff3rX08cuitFh6HeeOONmjdvnubNm6dnn31WAwcOrGmdd999t+644w49+OCDmj9/voYOHar169dLknr37i0zkyT17NlTGzZsqLquXXfdVfX19Xr/+9+vXr166eijj9bcuXMlSf369ZOZaeutt9YXv/hFPfzww5v69gEAAABkgKYUbTr55JM1efJkDR48ODH9yCOP1E9+8pOWcyQfffRRSdJhhx2ma6+9VpL0+OOP67HHHkut85VXXtGOO+6obbfdVgsXLtRDDz202fkOPPBAvfzyy1q5cqUk6a9//av2228/SdLy5cslRQ30zTffrP3333+zXwcAAABA16EpzcDMmTNDR0ioNc+uu+6q008/PTX93HPP1ZtvvqkhQ4Zo0KBBOvfccyVJX/nKV7R69WoNHDhQ5513noYPH5567lFHHaUNGzZo4MCBmjRpkkaMGFFT3krnlPbs2VOXXHKJPv7xj2vw4MFyd02YMEGS9PnPf16DBw/W4MGD9cILL+g73/lOTe+5K+VtOwAAAADywPIwImipVPLm61nmVd2kOVXnN00d3ea8ZcuWqX///p0dabPlLU9RUHcAAAAUlZk1unup0jz2lGago9f47Gx5y1MU1B0AAABIoykFAAAAAARDUwoAAAAACIamNAPNg+/kRd7yFAV1BwAAANIY6KhGHRnoCAAAAACKjIGOAqt0aZSQ8panKKg7AAAAkEZTmoG5c+eGjpCQtzxFQd0BAACANJpSAAAAAEAwNKUZ6NevX+gICXnLUxTUHQAAAEijKc3AsmXLQkdIyFueoqDuAAAAQBpNaQamTJkSOkJC3vIUBXUHAAAA0rgkTI06ckkYM1Me6twsb3mKgroDAACgqLgkDAAAAAAgl2hKAQAAAADB0JRmIG+HJuctT1FQdwAAACCNphQAAAAAEAxNaQZKpYrn8waTtzxFQd0BAACANJpSAAAAAEAwNKUAAAAAgGBoSjMwefLk0BES8panKKg7AAAAkGbuHjqDSqWS531k0rpJc6rOb5o6OqMkAAAAANC9mFmju1ccZIU9pRno379/6AgJectTFNQdAAAASKMpzcDy5ctDR0jIW56ioO4AAABAGk0pAAAAACAYmtIMDBs2LHSEhLzlKQrqDgAAAKS125Sa2ZVm9ryZPV42bYqZLTWzefFtVNm8b5nZU2b2TzM7squCdyeNjY2hIyTkLU9RUHcAAAAgrZY9pVdJOqrC9B+5e318u0WSzGw/SeMlDYqf83Mz69lZYburiRMnho6QkLc8RUHdAQAAgLR2m1J3v0fSizWu75OSrnf3193935KekvTBDuTbIsyYMSN0hIS85SkK6g4AAACkdeSc0tPM7LH48N4d42kDJC0uW2ZJPA0AAAAAgJTNbUp/IWlPSfWSlku6dFNXYGYTzazBzBpWrly5mTEAAAAAAN3ZZjWl7r7C3d9y97clzdDGQ3SXStqtbNFd42mV1jHd3UvuXurTp8/mxOg2li6tWIJg8panKKg7AAAAkLZZTamZ9St7eIyk5pF5Z0oab2Zbm9kekvaS9HDHInZ/eRt1NW95ioK6AwAAAGm92lvAzK6TNFLSe8xsiaTJkkaaWb0kl9Qk6RRJcvd/mNnvJS2QtEHSqe7+VtdE7z7GjRsndw8do0Xe8hQFdQcAAADS2m1K3f24CpOvqLL89yR9ryOhAAAAAADF0JHRdwEAAAAA6BCa0gxMmzYtdISEvOUpCuoOAAAApFkeznErlUre0NAQOkZVdZPmVJ3fNHV0RkkAAAAAoHsxs0Z3L1Wax57SDJhZ6AgJectTFNQdAAAASKMpBQAAAAAEQ1MKAAAAAAiGpjQDY8aMCR0hIW95ioK6AwAAAGk0pRmYNWtW6AgJectTFNQdAAAASKMpzcDYsWNDR0jIW56ioO4AAABAGk1pBmbPnh06QkLe8hQFdQcAAADSaEoBAAAAAMHQlAIAAAAAgqEpzYC7h46QkLc8RUHdAQAAgDSa0gxMnz49dISEvOUpCuoOAAAApFke9t6USiVvaGgIHaOquklzqs5vmjq6zXlmlqu9ZHnLUxTUHQAAAEVlZo3uXqo0jz2lAAAAAIBgaEoBAAAAAMHQlGZg5syZoSMk5C1PUVB3AAAAII2mNAPDhw8PHSEhb3mKgroDAAAAaTSlGRgwYEDoCAl5y1MU1B0AAABIoykFAAAAAARDUwoAAAAACIamNAMTJkwIHSEhb3mKgroDAAAAaebuoTOoVCp5Q0ND6BhV1U2aU3V+09TRGSUBAAAAgO7FzBrdvVRpHntKM5C3UVfzlqcoqDsAAACQRlOagblz54aOkJC3PEVB3QEAAIA0mlIAAAAAQDA0pRno169f6AgJectTFNQdAAAASKMpzcCyZctCR0jIW56ioO4AAABAGk1pBqZMmRI6QkLe8hQFdQcAAADSuCRMjTpySRgzUx7q3CxveYqCugMAAKCouCQMAAAAACCXaEoBAAAAAMHQlGYgb4cm5y1PUVB3AAAAII2mFAAAAAAQDE1pBkqliufzBpO3PEVB3QEAAIA0mlIAAAAAQDA0pQAAAACAYGhKMzB58uTQERLylqcoqDsAAACQZu4eOoNKpZLnfWTSuklzqs5vmjo6oyQAAAAA0L2YWaO7VxxkhT2lGejfv3/oCAl5y1MU1B0AAABIoynNwPLly0NHSMhbnqKg7gAAAEAaTSkAAAAAIBia0gwMGzYsdISEvOUpCuoOAAAApNGUZqCxsTF0hIS85SkK6g4AAACk0ZRmYOLEiaEjJOQtT1FQdwAAACCNS8LUqCOXhDEz5aHOzfKWpyioOwAAAIqKS8IAAAAAAHKJphQAAAAAEAxNaQaWLl0aOkJC3vIUBXUHAAAA0mhKM5C3UVfzlqcoqDsAAACQ1m5TamZXmtnzZvZ42bSLzWyhmT1mZjeZ2Q7x9DozW2dm8+LbL7syfHcxbty40BES8panKKg7AAAAkFbLntKrJB3VatrtkvZ39yGS/iXpW2XzFrl7fXz7cufEBAAAAABsidptSt39Hkkvtpp2m7tviB8+JGnXLsgGAAAAANjCdcY5pSdL+nPZ4z3M7FEz+5uZHdoJ6+/2pk2bFjpCQt7yFAV1BwAAANLM3dtfyKxO0mx337/V9HMklSR9yt3dzLaWtJ27rzKz4ZJuljTI3V+tsM6JkiZK0vve977hzzzzTEffS5eqmzSn6vymqaMzSgIAAAAA3YuZNbp7qdK8zd5TamYnSRoj6fMed7bu/rq7r4rvN0paJGnvSs939+nuXnL3Up8+fTY3RrdgZqEjJOQtT1FQdwAAACBts5pSMztK0jckjXP3tWXT+5hZz/j++yXtJenpzggKAAAAANjy9GpvATO7TtJISe8xsyWSJisabXdrSbfHe38eikfaPUzSBWb2pqS3JX3Z3V+suGIAAAAAQOG125S6+3EVJl/RxrI3Srqxo6G2NGPGjAkdISFveYqCugMAAABpnTH6Ltoxa9as0BES8panKKg7AAAAkEZTmoGxY8eGjpCQtzxFQd0BAACANJrSDMyePTt0hIS85SkK6g4AAACk0ZQCAAAAAIKhKQUAAAAABENTmgF3Dx0hIW95ioK6AwAAAGk0pRmYPn166AgJectTFNQdAAAASLM87L0plUre0NAQOkZVdZPmVJ3fNHV0m/PMLFd7yfKWpyioOwAAAIrKzBrdvVRpHntKAQAAAADB0JQCAAAAAIKhKc3AzJkzQ0dIyFueoqDuAAAAQBpNaQaGDx8eOkJC3vIUBXUHAAAA0mhKMzBgwIDQERLylqcoqDsAAACQRlMKAAAAAAiGphQAAAAAEAxNaQYmTJgQOkJC3vIUBXUHAAAA0szdQ2dQqVTyhoaG0DGqqps0p+r8pqmjM0oCAAAAAN2LmTW6e6nSPPaUZiBvo67mLU9RUHcAAAAgjaY0A3Pnzg0dISFveYqCugMAAABpNKUAAAAAgGBoSjPQr1+/0BES8panKKg7AAAAkEZTmoFly5aFjpCQtzxFQd0BAACANJrSDEyZMiV0hIS85SkK6g4AAACkcUmYGnXkkjBmpjzUuVne8hQFdQcAAEBRcUkYAAAAAEAu0ZQCAAAAAIKhKc1A3g5NzlueoqDuAAAAQBpNKQAAAAAgGJrSDJRKFc/nDSZveYqCugMAAABpNKUAAAAAgGBoSgEAAAAAwdCUZmDy5MmhIyTkLU9RUHcAAAAgzdw9dAaVSiXP+8ikdZPmVJ3fNHV0RkkAAAAAoHsxs0Z3rzjICntKM9C/f//QERLylqcoqDsAAACQRlOageXLl4eOkJC3PEVB3QEAAIA0mlIAAAAAQDA0pRkYNmxY6AgJectTFNQdAAAASKMpzUBjY2PoCAl5y1MU1B0AAABIoynNwMSJE0NHSMhbnqKg7gAAAEAal4SpUUcuCWNmykOdm+UtT1FQdwAAABQVl4QBAAAAAOQSTSkAAAAAIBia0gwsXbo0dISEvOUpCuoOAAAApNGUZiBvo67mLU9RUHcAAAAgjaY0A+PGjQsdISFveYqCugMAAABpNKUAAAAAgGBoSgEAAAAAwdCUZmDatGmhIyTkLU9RUHcAAAAgzdw9dAaVSiVvaGgIHaOquklzqs5vmjo6oyQAAAAA0L2YWaO7lyrNY09pBswsdISEvOUpCuoOAAAApNGUAgAAAACCqakpNbMrzex5M3u8bNpOZna7mT0Z/9wxnm5mdrmZPWVmj5nZsK4KDwAAAADo3mrdU3qVpKNaTZsk6U5330vSnfFjSfoPSXvFt4mSftHxmN3bmDFjQkdIyFueoqDuAAAAQFpNTam73yPpxVaTPynp6vj+1ZKOLpt+jUcekrSDmfXrjLDd1axZs0JHSMhbnqKg7gAAAEBaR84p7evuy+P7z0nqG98fIGlx2XJL4mmFNXbs2NAREvKWpyioOwAAAJDWKQMdeXRdmU26toyZTTSzBjNrWLlyZWfEyK3Zs2eHjpCQtzxFQd0BAACAtI40pSuaD8uNfz4fT18qabey5XaNpyW4+3R3L7l7qU+fPh2IAQAAAADorjrSlM6UdGJ8/0RJfyqbfkI8Cu8ISa+UHeYLAAAAAECLXrUsZGbXSRop6T1mtkTSZElTJf3ezL4k6RlJn40Xv0XSKElPSVor6YudnLnbiY5uzo+85SkK6g4AAACk1Tr67nHu3s/de7v7ru5+hbuvcvePu/te7v4Jd38xXtbd/VR339PdB7t7Q9e+hfybPn166AgJectTFNQdAAAASLM87L0plUre0JDv3rVu0pyq85umjm5znpnlai9Z3vIUBXUHAABAUZlZo7uXKs3rlNF3AQAAAADYHDSlAAAAAIBgaEozMHPmzNAREvKWpyioOwAAAJBGU5qB4cOHh46QkLc8RUHdAQAAgDSa0gwMGDAgdISEvOUpCuoOAAAApNGUAgAAAACCoSkFAAAAAARDU5qBCRMmhI6QkLc8RUHdAQAAgDRz99AZVCqVvKGhIXSMquomzak6v2nq6IySAAAAAED3YmaN7l6qNI89pRnI26irectTFNQdAAAASKMpzcDcuXNDR0jIW56ioO4AAABAGk0pAAAAACAYmtIM9OvXL3SEhLzlKQrqDgAAAKTRlGZg2bJloSMk5C1PUVB3AAAAII2mNANTpkwJHSEhb3mKgroDAAAAaVwSpkYduSSMmSkPdW6WtzxFQd0BAABQVFwSBgAAAACQSzSlAAAAAIBgaEozkLdDk/OWpyioOwAAAJBGUwoAAAAACIamNAOlUsXzeYPJW56ioO4AAABAGk0pAAAAACAYmlIAAAAAQDA0pRmYPHly6AgJectTFNQdAAAASDN3D51BpVLJ8z4yad2kOVXnN00dnVESAAAAAOhezKzR3SsOssKe0gz0798/dISEvOUpCuoOAAAApNGUZmD58uWhIyTkLU9RUHcAAAAgjaYUAAAAABAMTWkGhg0bFjpCQt7yFAV1BwAAANJoSjPQ2NgYOkJC3vIUBXUHAAAA0mhKMzBx4sTQERLylqcoqDsAAACQxiVhatSRS8KYmfJQ52Z5y1MU1B0AAABFxSVhAAAAAAC5RFMKAAAAAAiGpjQDS5cuDR0hIW95ioK6AwAAAGk0pRnI26irectTFNQdAAAASKMpzcC4ceNCR0jIW56ioO4AAABAWq/QAbYU7Y3OCwAAAABIY08pAAAAACAYmtIM7HTkaaEjJEybNi10hEKi7gAAAEAaTWkGtq8/KnSEhIkTJ4aOUEjUHQAAAEijKc3AMxeNCR0hwcxCRygk6g4AAACk0ZQCAAAAAIKhKQUAAAAABENTmoFt9jwwdISEMWPydThxUVB3AAAAII2mNAO7fHpy6AgJs2bNCh2hkKg7AAAAkEZTmoHn/3h+6AgJY8eODR2hkKg7AAAAkEZTmoF1ix4JHSFh9uzZoSMUEnUHAAAA0mhKAQAAAADB0JQCAAAAAILptblPNLN9JN1QNun9ks6TtIOkCZJWxtO/7e63bHbCLcDu38zXYZvuHjpCIVF3AAAAIG2z95S6+z/dvd7d6yUNl7RW0k3x7B81zyt6QypJr827NXSEhOnTp4eOUEjUHQAAAEjrrMN3Py5pkbs/00nr26K8+Jefho6QcMopp4SOUEjUHQAAAEjrrKZ0vKTryh6fZmaPmdmVZrZjJ70GAAAAAGAL0+Gm1My2kjRO0h/iSb+QtKekeknLJV3axvMmmlmDmTWsXLmy0iIAAAAAgC1cZ+wp/Q9Jc919hSS5+wp3f8vd35Y0Q9IHKz3J3ae7e8ndS3369OmEGPnV59hzQ0dImDlzZugIhUTdAQAAgLTOaEqPU9mhu2bWr2zeMZIe74TX6Na26vuB0BEShg8fHjpCIVF3AAAAIK1DTamZvVPS4ZL+t2zyD83s/8zsMUkflfS1jrzGlmDpz08MHSFhwIABoSMUEnUHAAAA0jb7OqWS5O5rJO3catoXOpQIAAAAAFAYnTX6LgAAAAAAm4ymNAPbHXBk6AgJEyZMCB2hkKg7AAAAkEZTmoGdj/rv0BESpk+fHjpCIVF3AAAAII2mNAPLr/pq6AgJjAIbBnUHAAAA0mhKM/DGikWhIyTMnTs3dIRCou4AAABAGk0pAAAAACAYmtIM9Nxup9AREvr16xc6QiFRdwAAACCNpjQDu556TegICcuWLQsdoZCoOwAAAJBGU5qBl+/7XegICVOmTAkdoZCoOwAAAJBm7h46g0qlkjc0NISOUVXdpDmb/dxnLhqjPNS5mZnlKk9RUHcAAAAUlZk1unup0jz2lAIAAAAAgqEpBQAAAAAEQ1OagfeeeFnoCAl5P1R6S0XdAQAAgDSaUgAAAABAMDSlGXju6jNCR0golSqeX4wuRt0BAACANJpSAAAAAEAwNKUAAAAAgGBoSjPw7kOOCx0hYfLkyaEjFBJ1BwAAANLM3UNnUKlU8ryPTFo3aU6Hnt80dXQnJQEAAACA7sXMGt294iAr7CnNwJKfnRA6QkL//v1DRygk6g4AAACk0ZRm4K3VL4aOkLB8+fLQEQqJugMAAABpNKUAAAAAgGBoSjOwVd89Q0dIGDZsWOgIhUTdAQAAgDSa0gz0O+nHoSMkNDY2ho5QSNQdAAAASKMpzcCqW38SOkLCxIkTQ0coJOoOAAAApNGUZmD1/L+EjpAwY8aM0BEKiboDAAAAaTSlAAAAAIBgaEoBAAAAAMHQlGZgwH9dHTpCwtKlS0NHKCTqDgAAAKTRlGbgjRVPhY6QwCiwYVB3AAAAII2mNAMrb/xu6AgJ48aNCx2hkKg7AAAAkEZTCgAAAAAIhqYUAAAAABAMTWkGdjrytNAREqZNmxY6QiFRdwAAACDN3D10BpVKJW9oaAgdo6q6SXO6dP1NU0d36foBAAAAIBQza3T3UqV57CnNwDMXjQkdIcHMQkcoJOoOAAAApNGUAgAAAACCoSkFAAAAAARDU5qBbfY8MHSEhDFj8nU4cVFQdwAAACCNpjQDu3x6cugICbNmzQodoZCoOwAAAJBGU5qB5/94fugICWPHjg0doZCoOwAAAJDWK3SAIli36JF2l2nvkjOdecmY2bNnd9q6UDvqDgAAAKSxpxQAAAAAEAxNKQAAAAAgGJrSDOz+zXwdtunuoSMUEnUHAAAA0mhKM/DavFtDR0iYPn166AiFRN0BAACANJrSDLz4l5+GjpBwyimnhI5QSNQdAAAASKMpBQAAAAAEQ1MKAAAAAAiGpjQDfY49N3SEhJkzZ4aOUEjUHQAAAEijKc3AVn0/EDpCwvDhw0NHKCTqDgAAAKTRlGZg6c9PDB0hYcCAAaEjFBJ1BwAAANJ6dXQFZtYk6TVJb0na4O4lM9tJ0g2S6iQ1Sfqsu7/U0dcCAAAAAGxZOmtP6Ufdvd7dS/HjSZLudPe9JN0ZPwYAAAAAIKGrDt/9pKSr4/tXSzq6i16nW9jugCNDR0iYMGFC6AiFRN0BAACANHP3jq3A7N+SXpLkkqa5+3Qze9ndd4jnm6SXmh+XPW+ipImS9L73vW/4M88806EcXa1u0pygr980dXTQ1wcAAACAzWVmjWVH1iZ0xp7SD7v7MEn/IelUMzusfKZHXW+q83X36e5ecvdSnz59OiFGfi2/6quhIyQwCmwY1B0AAABI63BT6u5L45/PS7pJ0gclrTCzfpIU/3y+o6/Tnb2xYlHoCAlz584NHaGQqDsAAACQ1qGm1MzeaWbbN9+XdISkxyXNlNR8HZQTJf2pI68DAAAAANgydfSSMH0l3RSdNqpekq5191vN7BFJvzezL0l6RtJnO/g63VrP7XYKHSGhX79+oSMUEnUHAAAA0jrUlLr705IOqDB9laSPd2TdW5JdT72my1+jloGYmgdLWrZsWVfHQQXUHQAAAEjrqkvCoMzL9/0udISEKVOmhI5QSNQdAAAASKMpzcAr918XOkLC+eefHzpCIVF3AAAAIK2j55QiI6GvkwoAAAAAXYE9pQAAAACAYGhKM/DeEy8LHSGhoaEhdIRCou4AAABAGk0pAAAAACAYmtIMPHf1GaEjJJRKpdARCom6AwAAAGk0pQAAAACAYGhKAQAAAADB0JRm4N2HHBc6QsLkyZNDRygk6g4AAACkmbuHzqBSqeR5H5l0S7hOaNPU0aEjAAAAACggM2t094qDrLCnNANLfnZC6AgJ/fv3Dx2hkKg7AAAAkEZTmoG3Vr8YOkLC8uXLQ0coJOoOAAAApNGUAgAAAACCoSnNwFZ99wwdIWHYsGGhIxQSdQcAAADSaEoz0O+kH4eOkNDY2Bg6QiFRdwAAACCNpjQDq279SegICRMnTgwdoZCoOwAAAJBGU5qB1fP/EjpCwowZM0JHKCTqDgAAAKTRlAIAAAAAgqEpBQAAAAAEQ1OagQH/dXXoCAlLly4NHaGQqDsAAACQRlOagTdWPBU6QgKjwIZB3QEAAIA0mtIMrLzxu6EjJIwbNy50hEKi7gAAAEAaTSkAAAAAIBiaUgAAAABAMDSlGdjpyNNCR0iYNm1a6AiFRN0BAACANJrSDGxff1ToCAkTJ04MHaGQqDsAAACQRlOagWcuGhM6QoKZhY5QSNQdAAAASKMpBQAAAAAEQ1MKAAAAAAiGpjQD2+x5YOgICWPG5Otw4qKg7gAAAEAaTWkGdvn05NAREmbNmhU6QiFRdwAAACCNpjQDz//x/NAREsaOHRs6QiFRdwAAACCNpjQD6xY9EjpCwuzZs0NHKCTqDgAAAKT9//buP9bq+r7j+OsVlLUpRgtllICCrcbGPyYqczaaptN03lZTt9Qsmq6yrhOToYGkgLOz7gAADZhJREFUy2RNGmBNkzbZql21Jqw6cXF2HbYbsQZnWpPNJnMipfUHMwODE0TopCKuSwn2vT/Oh8v5ci6C3Hs+n8/3fJ+P5Oae7/fce86Lz/vec/Ph/fl+DpNSAAAAAEAxp5QOgHwWrvz+hLcP2/GVq3PGAQAAAAA6pTksuK2uZZu15emKiCgdAQAAAKgOk9IMDmzZWDpCQ215umLt2rWlIwAAAADVYVKawb5H7ywdoaG2PF1x8803l44AAAAAVIdJKQAAAACgGDY6wriJNj/qx0ZIAAAAAKYandIMZn/qi6UjNNSWpys2bNhQOgIAAABQHSalGUyfc07pCA215emKiy++uHQEAAAAoDpMSjPY9c0lpSM01JanK+bNm1c6AgAAAFAdJqUAAAAAgGKYlAIAAAAAimFSmsGMC64qHaGhtjxdcdNNN5WOAAAAAFSHSWkGs8ZuLR2hobY8XbF27drSEQAAAIDqMCnNYPd9y0tHaKgtT1ew+y4AAAAwiElpBgf3bC8doaG2PF2xefPm0hEAAACA6jApBQAAAAAUc9KTUttn2n7c9vO2n7O9PJ1fbXuX7S3p4xNTF7edps2YWTpCQ215umLu3LmlIwAAAADVOWUS33tI0ucjYrPt0yQ9bfuxdN/tEfGXk483GuYvu790hIba8nTFK6+8UjoCAAAAUJ2T7pRGxO6I2JxuH5C0VdK8qQo2Sl5/4oHSERpqy9MVq1evLh0BAAAAqM6UXFNqe6GkCyU9mU7dYvuntu+1/d6peI422/+jB0tHaKgtT1esWbOmdAQAAACgOpNZvitJsj1D0kOSVkTEG7bvlvQlSZE+/5WkP5rg+5ZKWipJZ5111mRjYAQsXPn9t71/x1euzpQEAAAAQC6T6pTaPlW9CekDEfFdSYqIPRHxVkT8StLfSLpkou+NiLURsTgiFs+ePXsyMQAAAAAALTWZ3Xct6R5JWyPia33n+7cY/T1Jz558vNHw/iV3lI7QUFuerti0aVPpCAAAAEB1JrN89zJJn5H0jO0t6dwXJN1ge5F6y3d3SLp5UgkBAAAAACPrpCelEfGEJE9w1yMnH2c0vbpuhRbc9nDpGONqy3Oi2n7N6eLFixURpWMAAAAAVZmS3XcBAAAAADgZTEoBAAAAAMUwKc3g9MtuKB2hobY8XbFq1arSEQAAAIDqMCnN4IzLP106QkNtebpi9erVpSMAAAAA1ZnM7rs4QTvvulHzl91fOsa42vKMkrfbjGnnXTfq0IHXMqYBAAAA6kenNIO33txXOkJDbXm6gnEHAAAABjEpBQAAAAAUw/LdDKbP+WDpCA0nm6ft7xNaWm0/BwAAAEAN6JRmMPcPv146QkNtebqCcQcAAAAG0SnN4LWN39CssVtLxxhXKs/xOq01ZBhmt/e1jd+Q6CYDAAAADXRKM3jzJ4+WjtBQW56uYNwBAACAQXRKgXeghm4vAAAAMErolAIAAAAAimFSmsG8P1lXOkJDbXm6gnEHAAAABjEpzeDgnm2lIzTUlqcrGHcAAABgEJPSDH720JdKR2ioLU9XMO4AAADAIDY6AjIq+ZY0AAAAQI3olAIAAAAAimFSmsHMq24pHaGhtjxdwbgDAAAAg5iUZnDaorHSERpqy9MVjDsAAAAwiElpBi999ZrSERpqy9MVjDsAAAAwiI2OMDKOt4lQrscY5vOzERIAAABGDZ1SAAAAAEAxdEozePcHf7N0hIba8nRFDeN+Ip1gurEAAADIiU5pBr9+3arSERpqy9MVjDsAAAAwiElpBnvXrykdoaG2PF3BuAMAAACDWL6bwf9tf6p0hIZh5Sm9SVDtavs5OBY2WwIAAEBOdEoBAAAAAMXQKQWAytCtBgAAXUKnNIMFtz1cOkJDbXm6gnEHAAAABjEpzeDAlo2lIzTUlqcrGHcAAABgEMt3M9j36J06bdFY6RjjasvTFV0Z92EvPZ3shlosfQUAAKgLnVIAAAAAQDF0SoEW4W13Ju9ExpBuKgAAQD50SjOY/akvlo7QUFuermDcAQAAgEFMSjOYPuec0hEaasvTFYw7AAAAMIjluxns+uaSqt4OpLY8XTEq4z7sJcQsUQYAAOgWOqUAAAAAgGLolALIik4oAAAA+tEpzWDGBVeVjtBQW56uYNwBAACAQXRKM5g1dmvpCA215ekKxr07jtcNnuxbzgz78QEAAHKiU5rB7vuWl47QUFuermDcAQAAgEFMSjM4uGd76QgNteXpCsYdAAAAGMTyXQDomKnYbGrUlyDXng8AgFFCpzSDaTNmlo7QUFuermDcAQAAgEF0SjOYv+z+0hEaasvTFYx7e5TeqGgUdOHfOGx0awEAXUGnNIPXn3igdISG2vJ0BeMOAAAADGJSmsH+Hz1YOkJDbXm6gnEHAAAABrF8FwDeIZamDn8MWLp6fIwRAGBU0CkFAAAAABRDpzSD9y+5o3SEhtrydAXjjlzo5A7fiYzx8TqVXeg215BhmGp4eyUAGAVD65TaHrP9gu1ttlcO63kAAAAAAO01lE6p7WmS7pL0MUk7JT1le0NEPD+M56vdq+tWaMFtD5eOMa62PF3BuANTZ7IduBzd5No71m3oYrYhY+1Kv8UVNapDF+o06v/GqVihU7NhdUovkbQtIl6MiIOSvi3p2iE9FwAAAACgpYY1KZ0n6eW+453pHAAAAAAA4xwRU/+g9nWSxiLij9PxZyT9VkTc0vc1SyUtTYfnSXphCp76fZL+ZwoeB+VQw9FAHduPGrYfNWw/ath+1HA0UMepsSAiZk90x7B2390l6cy+4/np3LiIWCtp7VQ+qe1NEbF4Kh8TeVHD0UAd248ath81bD9q2H7UcDRQx+Eb1vLdpySda/ts29MlXS9pw5CeCwAAAADQUkPplEbEIdu3SHpU0jRJ90bEc8N4LgAAAABAew1r+a4i4hFJjwzr8Y9hSpcDowhqOBqoY/tRw/ajhu1HDduPGo4G6jhkQ9noCAAAAACAEzGsa0oBAAAAADiukZmU2h6z/YLtbbZXls6Didm+1/Ze28/2nZtp+zHb/5U+vzedt+2/TjX9qe2LyiXHYbbPtP247edtP2d7eTpPHVvC9rts/4ftn6Qarknnz7b9ZKrVP6SN6mT719LxtnT/wpL5cYTtabZ/bPvhdEwNW8b2DtvP2N5ie1M6x+tpi9g+w/Z62/9pe6vtD1PD9rB9Xvr9O/zxhu0V1DCvkZiU2p4m6S5JH5d0vqQbbJ9fNhWO4T5JY0edWynpBxFxrqQfpGOpV89z08dSSXdnyoi3d0jS5yPifEmXSlqWft+oY3v8UtIVEXGBpEWSxmxfKumrkm6PiHMk/VzS59LXf07Sz9P529PXoQ7LJW3tO6aG7fTbEbGo7y0neD1tl69L2hgRH5J0gXq/k9SwJSLihfT7t0jSxZJ+Iel7ooZZjcSkVNIlkrZFxIsRcVDStyVdWzgTJhAR/ypp31Gnr5W0Lt1eJ+l3+87fHz3/LukM23PzJMWxRMTuiNicbh9Q74/vPFHH1ki1eDMdnpo+QtIVktan80fX8HBt10u60rYzxcUx2J4v6WpJ30rHFjUcFbyetoTt0yV9RNI9khQRByPidVHDtrpS0vaIeEnUMKtRmZTOk/Ry3/HOdA7tMCcidqfbr0qak25T18qlJYAXSnpS1LFV0rLPLZL2SnpM0nZJr0fEofQl/XUar2G6f7+kWXkTYwJ3SPozSb9Kx7NEDdsoJP2L7adtL03neD1tj7Ml/UzS36al9N+y/R5Rw7a6XtKD6TY1zGhUJqUYEdHbDpotoVvA9gxJD0laERFv9N9HHesXEW+lpUrz1Vtt8qHCkfAO2L5G0t6IeLp0Fkza5RFxkXpLApfZ/kj/nbyeVu8USRdJujsiLpT0vzqyzFMSNWyLdA3+JyX949H3UcPhG5VJ6S5JZ/Ydz0/n0A57Di97SJ/3pvPUtVK2T1VvQvpARHw3naaOLZSWmT0u6cPqLUE6/P7V/XUar2G6/3RJr2WOiqbLJH3S9g71Llm5Qr3r2qhhy0TErvR5r3rXsV0iXk/bZKeknRHxZDper94klRq2z8clbY6IPemYGmY0KpPSpySdm3YdnK5e631D4Uw4cRskLUm3l0j6577zN6Zdzi6VtL9vGQUKSdeh3SNpa0R8re8u6tgStmfbPiPdfrekj6l3bfDjkq5LX3Z0DQ/X9jpJPwze5LqoiPjziJgfEQvV+5v3w4j4tKhhq9h+j+3TDt+W9DuSnhWvp60REa9Ketn2eenUlZKeFzVsoxt0ZOmuRA2z8qj8TbL9CfWur5km6d6I+HLhSJiA7QclfVTS+yTtkbRK0j9J+o6ksyS9JOn3I2Jfmvzcqd5uvb+Q9NmI2FQiN46wfbmkf5P0jI5cy/YF9a4rpY4tYPs31Nu0YZp6/zn5nYj4C9sfUK/rNlPSjyX9QUT80va7JP2detcP75N0fUS8WCY9jmb7o5L+NCKuoYbtkur1vXR4iqS/j4gv254lXk9bw/Yi9TYcmy7pRUmfVXptFTVshfSfQv8t6QMRsT+d4/cwo5GZlAIAAAAA2mdUlu8CAAAAAFqISSkAAAAAoBgmpQAAAACAYpiUAgAAAACKYVIKAAAAACiGSSkAAAAAoBgmpQAAAACAYpiUAgAAAACK+X+PmfEKu+ADjwAAAABJRU5ErkJggg==\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "items_per_user=df.groupby(['user']).count()['rating']\n",
+ "\n",
+ "plt.figure(figsize=(16,8))\n",
+ "plt.hist(items_per_user, bins=100)\n",
+ "\n",
+ "# Let's add median\n",
+ "t=items_per_user.median()\n",
+ "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
+ "plt.text(t*1.1, plt.ylim()[1]*0.9, 'Median: {:.0f}'.format(t))\n",
+ "\n",
+ "# Let's add also some percentiles\n",
+ "t=items_per_user.quantile(0.25)\n",
+ "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
+ "plt.text(t*1.1, plt.ylim()[1]*0.95, '25% quantile: {:.0f}'.format(t))\n",
+ "\n",
+ "t=items_per_user.quantile(0.75)\n",
+ "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
+ "plt.text(t*1.05, plt.ylim()[1]*0.95, '75% quantile: {:.0f}'.format(t))\n",
+ "\n",
+ "plt.title('Number of ratings per user', fontsize=30)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6UAAAHvCAYAAACsfXllAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzde5wU5ZX/8e8BREW8oYgwqBhEgyiO0CrG1RCzBhYB7wluvLC6DJto1JgYSfwp4CWriSZeYxxWBXa9xNXVABrwHqPxkhkEY7xCgkFARBTlohL0/P6oGmiGnu4Giqeq8PN+vfpFd9VTdZ4+XTPM6afqKXN3AQAAAACQhlZpdwAAAAAA8MVFUQoAAAAASA1FKQAAAAAgNRSlAAAAAIDUUJQCAAAAAFJDUQoAAAAASA1FKQAEYGZjzMzjR/+0+5M3ZtbBzK4wsxfNbKmZfR7ncknafdtUzGx80THTLe3+ID1m1r/oWBiTdn8AIGlt0u4AgM2LmTW/+XE/d3++wjbDJN0Vvxzr7mM2Rd+QT2bWSdJzkrql3JWNEn8Z0T9+Od7d56TWGWx2zOw8STtIWuLu16bdHwBYHxSlADa1n0r6etqdQK5dpDUF6TOS/kfSQkku6R8p9WlD9Jc0On7+pKQ5aXUEm6XzJO0h6S1JFKUAcoWiFMCmdqSZ/bO7P5p2R5Bbg+J/P5D0DXdfkWZnQnH34ZKGp9wNZIC7PynJ0u4HAGwqXFMKYFMpLhx+mlovsDnYLf739S9KQQoAwBcJRSmATWWupPvj5weZ2fFpdga51jb+99NUewEAADYJilIAm9L/k/R5/PxyM2u9oTsqmnnyyY1ta2ZPNrWJX7cyszPi5e+a2XIze9nM/p+Zbdts213N7DIze8nMPjKzD83sKTP75ga8p0Fm9lsze9vMPo3/vcvMDl2PfexsZheZ2R/M7B0zW2lmi+LXPzKz9hW2nxPnYk78eiszO8fMnjazhfEst0+u73sr2n87M/u+mT0R9+/TOMdPm9mPzWz7FrZbPVtx0eKvFn22GzSTsZkNL9p2eLysYGb/ZWaz4s9+rf1a5PB49t/HzWx+/D6Wm9nfzOxuMxtSJuaY+H2MLlr8RIn38mSz7crOvltqRlYz293MrjGz1+L+LTGzP5rZd82sqkt2zOw4M3sw/vw/iY+R/zGzQ1rKYQv7OcbM7jGzv5rZinhf88xsppn9b9ynnarpUwv7XytvZtbRzC41sz/HP5sfmVmjmY0ys63XY78D49y/adFMzyvMbHa87J8qbLvex1eVfWpx9t2mn2FF15NK0h4ljq0WPysza2tmZ5rZJDObG39OSyz6HXdNqWOv2fbrHKdmdoKZ/S7+Wfk4Ph5/bmYdm227vZldEH9OH5jZMjP7k5n9h5nxNyrwReLuPHjw4JHYQ9HkMy7ptfj1+KJlw1vYZlhRmzEV9vvkevShZFtFk8w0tWkv6dGi180f0yXtGG93qKR3y7S9ukyfxhS16y/ppjL7+UzS6Cre53BJH5XZj0t6R9KhZfYxJ243R9Kekl4usY+KOW9h3/0kzavQv/cUXSdaLl/lHv3Xs0/Di49HSaMkrSq3X0m3V9mX30nabiPey5PNthtftK5bif32L1o/RtJARdfdtrT/hyVtWSY3W0i6p8z2qyT9oHkOS+xna0lTqnzP5yXwu+ZJSQdKertMnDcl7Vlhfx0lPVZFn/9L0hZJHV9Vvte1PusWfoYrPUp9VgVJf62w3aeSRpbpW/Fx2l3RRGQt7WuOpD3i7faRNKtM23sk2YYeHzx48MjXg4mOAGxqYySdrOgUzDFmdqe7r0y3S2u5XdHswM8o+iPoHUUjDmfF/x4o6VozGy1pmqL38V+Snpa0UtLhkkYomjjuB2Y21StP6nSupGMVFWT/JeklSe0UFRUnKDqLZYyZLXb3G0vtwMzO1ZoZNldIulfSHyUtlrRzvK+hkjpJetTMDnL3V8r0aUtJ/yepV/ze7pM0X9Ef6p0qvJ9S/TtQ0uOKChRJelHSnZL+LmlXSd+UdJiknSRNMbNveDSZS5O7Jc2InzedBv4XRaPvxV5e374V+ZaiPH0oaYKkRkVfCBwQL2uytaI/zH8v6QVJsyUtV5SbvSWdKqlDvK+Jij7bYk3vZVgcU5IuLtH39zbivdRKukDRZDi3SHo27nNB0n9I2kbSUYpmMr6khX3USzopfv6JomLjWUU5KUg6U9LVio61cn4q6ej4+QJFRcpfJC1T9CXQXoq+4Dmi+rdX1vaKjt0aRYX3A5LeV1T0nClp9zjmY2ZW6+4fNd+BmXVQ9F67x4tmxvucpehsj/0UFZk18T7bqPIkVNUeXxurTtHvj3pFx+SieFlz04tfWHRGxqPxtq7o99vDir5I2lrRZ3RqvP7XZvapu4+v0JcrJZ2o6Hfa/yiaCXhXRb8j91P0O3WimR0j6RFF+fxN3I+lkvoo+t27jaJj8WFFvyMBbO7Srop58OCxeT205lvu14qWXV+0/JwS26Q5UuqSflKiTUetGeVbpaioeFdS7xJtTy3a10MtxBzTLObLknYp0e5YRbc5cUWFz+4l2hSK2rxYqk3cbrCiwtklPddCmznN+vX9BI6BVlp7xPVaSa1KtLu4qM3fJW21sZ99FX0b3uz9viqpS4VtDpe0Q5n122jtEcavVnEM9K+ir+OL2ncrsb5/s/fylqQeJdodXHS8vK8So6WKvphp2s8iSfuVaNOtxPEyvFmb1pKWaM2o2DrHeLOfsS9vxGfZfGTtrBJt2kt6oqjNDS3s6/54/eeSzm2hTXtFhVvTvgYmcXxV+V6LP+sxLbRp+mzmVLG/beOfOVc0ut7SMbtXfFy5oi8Vdq5wnLqkX6vZz7uiIndmUZsGRV+mHVlif0fEn4NLemVjc8eDB498PDhfH0AIVygqsCTpIqtwnWNg09x9ndmB3X2RpKZRytaKRjfOdveXSrT9b0WnB0rRLXAqnYWyStK33P3dEvt6QNI18ct2kr5TYvtLFI3ULJU02N3/XiqIu09RNHIhSYeY2Vcq9Ot+d/9lhTbVGKxoxFWSnlNU6H7evJG7XybpwfjlbpJOSSD2+nBJw9x9ftlG7n9w9yVl1i9XNHrWdIyfmlwX18sp7v5m84Xu/oKi0ShJ2lFRkdrc94uen+3u64xAu/scVR4d7Kho5FKSflvqGC/a3yJ3f63C/qp1t7vfVCLGMkVfejWNjp5pZjsUtzGzPlozuv1Ld7+uhf427atplPP8Cn2q6vhKyQitmdX6NHf/falG7j5L0r/FL7dR6RHYYi8rOn7W+nl394+15neRJPVVVFw/XiLmU4pGTiWpp5nt1rwNgM0PRSmATc7dF2rNqaa7KLrJe1aUPD029kzR84Uqf9ri0/G/W2rNKYAtmebufymz/lpFp/lJ0nHFK8xsR605NfIud59XIdb/FD3/RoW2N1RYX63imZZ/7u5epm3xH6qhZ2j+g7vPTGJH7r5U0p/jl4cksc/19KK7/6HM+uI//vctXmFmW2nNsTFf0v+2tBOPTrFe54uZIh+3FGcTu6alFfHvn6afg60VnVJbrOlLBC+3n3hfH0h6KH55hJltWaZ5YsfXJtD0nt9w98nlGsaFY1NhXel3yC3uvqqFdcW/Tz9TdJp5S54ueh7yOAKQEq4pBRDKzxWN+nWQ9EMz+5W7v59ynyTp+TLrFhY9byw12tdC2x0rxHys3Ep3f8fMXlV0DdbeZra9uzeNzhymNV8ofmZmza9fbG6Louc9y7T7TNE1dUloGolzRdeNlfNHrbnWMHQxV66IW0tcfHxT0jGKRs07KeqzlWjeNZHerZ/nKqwv/vKi+fF5gNYcJ09VOM6l6PT33qVWuPuHZvaComPgn83sfkVfdvzB3f9RYb8b6kNF12uW87ik78bPD1J0nW+Tw+N/l0g62KzUR7qWLYv+/ZKiU3RLqfr4CsmiGa+bPr+FVfwOkaKfUan87xCp+t+nrxf9TqvUttLvUwCbAYpSAEHEf6xeJekqRaf3jZL0o3R7JSmaGKglxffFLNeuedutKrSdVWF9U5v9FBU9u2rNKYPditp8R6VP721JuT/uFrv7J+uxr3I6x/++E48gtsjdPzez2YoKow5m1tbDTYRVaZRZkmRm+yua+KlHlfvdboN7tOEqTZJU7vjsUvT8r1XEqtTmLEVfvGyn6LTYYyUtN7PnFY2APSrpmSqK32rNrjAaL639M9el2bpu8b87as2kWtUq9zNV1fGVgt205outw7WmKK9GpQKxxd+T7v5pUcGf5O9TAJsBTt8FENINWnMa2Nlm1vyPw+DW4w/jpP6AlqIJPipZXvS8+Brckvf1rFLbMus+LrNufTXd23V52VZrLCt6vm2LrZJX8T3Hs7I+qjUF6VxJNyuaQflfFZ1yfFz8aDolO43/Wzfm+Nym6Pn6HpvrcPcGRbMBT9SaHG8j6UhF10M/JWm2mX17/bta0sb8PEn5+JlK0sa83y0qrE/j9ymAzQAjpQCCcfePzewyRX/Ub63oD9T/SDJGTm643q6KNsWFwrIWnp/h7rcn06VELZW0g9Z+D+UUFwllR1ZTcLai66Cl6LYe/97SNXNmdlGwXiWruGBb32OzJHf/m6TTzWykoluLfEXSP0n6qqKf/W6S/sfM9ig10dh62pifp6bXO0j6u7vvsZF9yYPi9z/R3U9PrScAEMvDH28ANi+3KrrPoxTNhLlXlds1ndJZbmRCiu7RmXXVvOemNq7o3qlNik8JTOPaxWosiP/dtdJMyxadz9c0MdTigKfuVuuf439XSTqvzCQuUnQPxjwqnh32S1W0r6aNJMndP3H3J9z9Cnf/F0UF/oWKjmtJusTMdqq+qyV1t8oXghb/zDWfDbfpZ2oXM6s0Erg5yMPvEABfMBSlAIKKJzu5JH7ZRtKlVW7adEuOSqf8pjHz6fo6stxKM9tVayYUeaPZhCBPac0f9JVmwkzLC/G/pjVFXUu+ojUjpS+Ua5iSTvG/i8vdFsbMDlR0O5Ryik9ZrDibTkAzFd3HVIpmlK30t0H/DQ3k7svc/WeKrtGVosmCDtrQ/cW2l9SnQpuvFT3/U7N1TbdD2UrRPTLzqun4Kntsuft7kl6JX/YzszSugQaAtVCUAkjDXVpzW4lhiia5qaTpj6g9zKzcSM05G9OxQAaaWblZLM9RdG9USfq/4hXxfR+nxi//ycyyWJjeV/T8hxVGsS5sYbusaLpecRczK3e96yVl1jUpPm2y2lObN7l4gquH45ddJJ3UUlsz668WZt5dT3OKnidxKVGL9ww1s45acw/cFVrz89NkYtHz0WbWWvnUdHxVc2xNiP9tp2jSOQBIFUUpgODimTKbrr8zSd+rYrPiPySvKlXomNmlqjwylwVtJP0m/mN5LWY2RNIP45crFF1/29z/05qRrbvNrPl9F5vvcw8zu9rMdinXLkEPas2kP4dJ+nmp0Tcz+4mkIfHLuZLuCNO99dI0qmaSLm++0iKXKZphtpK/FT2vNLIX2i+Lnt9oZvs1b2Bm3SSNL7cTMzvQzC42s05l2uysNYWvq/x9T6v1r2a2zvXpZraNoi/Bmib3ua35iLe7P681X4gcLumOcqOHZtbGzI43s7MS6HeSmo6vncxs9wptb5L0Vvx8lJldUG6E3My2N7NzzCwPv18B5BATHQFIhbtPMbM/Kjp9s5pv9m9TdAuZDpJOlPQHM7tD0a0wdlc04lpQdP/BYZuk08l5QFER8xczGyfpz4pGLAYo+mO9qeC+0N3nNt/Y3aeb2XckjVN0i4bfmdkzkn6n6A/TfyjK05cVTS5TiDe9dpO9o7X797mZnaLoHqRbS/qBpK/Fn9fbik6J/WbcN8X9PS3BW9Ik6VeSzlA0cn2OmdUqGr1+R9GtNf5V0oGKRvI/ltS3zL7+oOi9biHpAjNrKsiabn/xvruncgqzuz9mZuMlDVd0Xfaf4td/VHRaaEFRHraTdK+in0Fp3VlUt1d0Sv7o+Jj8o6Q3FE1g1UHS/opy1iFuf4e7/30juz9D0URFN8f33Lxf0en+e0s6U2uu9f2b1nwZ1twZcfv9JX1L0gAzu0dSg6QPFB3HNYo+66MU/dzdupH9TtpjkobGz//PzG5WdH1302f0Z3efJ0nuvjzO1e8VfaY/kzTSzO5TdCwvi5d/SdE9Z/srup7/1DBvBcAXDUUpgDT9WGuu5yrL3ReZ2amKCoItFY3AHdas2RRFf1xmvSi9TtFkI2dJ+kmJ9S7pUne/saUduPutZvauosK0k0rno9hiScGKPnefYWZfVzQC1VnRyGCp0cH3Jf2ruz8Zqm/rI34f35N0o6Kzi47QutcdvirpGEn/VWFf75nZ1YqO+/Za93rq32sjrtdMQJ2ifp2o6PrK/9Das2N/rmgU/0OtKUqbz5bcdL1za5XOVbHfxDE31oeS/k3Rz/+A+NHcbEnfcPePSu3A3T8ys3+SVK+oKN0h7lu5/jWfMClttyn6nbK3oi9Hmh+P/6aike742D5Y0UjygYomHCt37+hPVfl+uACwQTh9F0Bq3P0prXt9V7n2Dym6/vR2SX9XNCPvIklPKPoGf6i7Z/XegGtx97MlHS1psqI/blfG//5G0mHuPqaKfUyWtKeiwmGSolNgP9aavDyr6N6wQyR1iSc4Ccbdn1V0f8/zFRVcixSNFC6O+3aRpO7uPi1kv9aXu9+sqOD/X0UjpP+Q9K6iUcDzJRXcfVaV+/qJpJMVHffvaM2s0qlz93+4+0mSTlDUv0WKCpG/Kzq1+jB3v0ZS8Wy57zfbx+8VjTb+UNEXEq8pGnX7PP73FUXF01fdfVhSP6/uPkNRYXW5pJcVFcvLJL2o6Iuf3u7+1wr7+Mjdhyn68uTaeNvFimZeXibpTUVnOZyv6Lit5jriYNx9maR+kq6QNF1RsV72fqDu/rqiAvYYRdeZviHpI0mfKRptnqnomtvhkjq7e9W/rwFgfVh0aRcAAEBl8Smex8cvd3L398u134T9aPoD5vfu3j+NPgAAksFIKQAAqEo82dHg+OXMtApSAMDmhaIUAADIzLqbWdcy62sUTSLUNl50S5COAQA2e0x0BAAAJOlQSbeb2VOKZgqerega5Z0UXav4TUWzREvSc4omBQIAYKNRlAIAgCZtJB0ZP1rypKQT3P2zID0CAGz2KEoBAIAUzQRdp+g+nD0V3a+0g6IZghdKel7S3fGszwAAJCYTs+/uvPPO3q1bt7S7scEWLVqkjh07bvYx84g8AQAAAOlrbGx8z91L/mGeiZHSbt26qaGhIe1uAAAAAAA2ATN7q6V1zL6bgDFjxnwhYuYReQIAAACyLROn7xYKBc/zSKmZKXQe04iZR+QJAAAASJ+ZNbp7odQ6RkoBAAAAAKmhKAUAAAAApIaiNAFpnHqc59OdQyJPAAAAQLZRlAIAAAAAUsNERwlgoqPsIk8AAABA+pjoCAAAAACQSRSlAAAAAIDUUJQmYPTo0V+ImHlEngAAAIBs45pSAAAAAMAmxTWlm1iXLl2+EDHziDwBAAAA2UZRmoAFCxZo7ty5+trXvqZ9991XvXr10nXXXbd6/ZgxY1RTU6Pa2lrV1tbqoYcekiQ988wz6t27twqFgt58801J0pIlS/SNb3xDn3/+ecWYm8oDDzygV155ZfXrSy65RI8++qgkqX///ht1788nn3xStbW16tWrl7761a9udF8raSlPr7/++urPo7a2Vtttt52uvfZaSZvm89qUNtXn9eGHH2rIkCE64IAD1KtXL91+++2r102YMEE9evRQjx49NGHChI17AwAAAPhCa5N2BzYXbdq00TXXXKM+ffpo6dKl6tu3r4466ijtu+++kqTvf//7+uEPf7jWNtdcc40eeughzZkzR7/+9a91zTXX6PLLL9dPfvITtWqV3vcFDzzwgAYPHry675deemki+12yZIm++93vaurUqdp999317rvvJrLfDbHPPvtoxowZkqTPPvtMNTU1Ou6441av5/OSbrrpJu27776aPHmyFi1apH322Uff/va3tWzZMo0dO1YNDQ0yM/Xt21dDhw7VjjvumEhcAAAAfLEwUpqAPn36qHPnzurTp48kadttt1XPnj01b968stttscUWWrFihVasWKEttthCs2fP1ty5c9W/f/8Wt5k6daq+/OUva+utt9Y555yjwYMHS4pG966++urV7fbbbz/NmTNHknTssceqb9++6tWrl+rr61e3ad++vS666CIdcMAB6tevnxYuXKg//vGPmjRpki644ALV1tZq9uzZGj58uO699951+vLwww/r0EMPVZ8+fXTSSSdp2bJlZd/vnXfeqeOPP1677767JGmXXXYp2z4JTZ9JOY899pi6d++uPfbYo2y7jfm8+vTpk7vPy8y0dOlSubuWLVumDh06qE2bNpo2bZqOOuoodejQQTvuuKOOOuooTZ06tey+AAAAgJZQlCagsbFxrddz5szRiy++qEMOOWT1shtvvFG9e/fWGWecoQ8++ECS9OMf/1innXaa/vM//1Nnn322LrroIl1++eUtxvnkk080YsQITZ48WcuXL9c777xTVf9uu+02NTY2qqGhQddff70WL14sSVq+fLn69eunmTNn6ogjjtC4ceP0la98RUOHDtXPf/5zzZgxQ927dy+5z/fee0+XX365Hn30UU2fPl2FQkG/+MUvJEWnj06aNGmdbd544w198MEH6t+/v/r27auJEydW1f+N0fyzKeXuu+/WySefvNaypD+vxsbG3H1eZ599tl599VV16dJF+++/v6677jq1atVK8+bN02677ba6XdeuXSt+AQMAAAC0hKI0AXV1daufL1u2TCeccIKuvfZabbfddpKk73znO5o9e7ZmzJihzp076wc/+IEkqba2Vs8995yeeOIJ/fWvf1Xnzp3l7vrWt76lU045RQsXLlwrzmuvvaY999xTPXr00MiRI3XKKadU1b/rr79+9eja3LlzV18P2bZt29Ujd3379l09UleN5557Tq+88ooOO+ww1dbWasKECXrrrbckRaePDh06dJ1tVq1apcbGRj344IOaNm2aLrvsMr3xxhtVx9wQxZ9NKStXrtSkSZN00kknrV62KT4vM8vd5zVt2jTV1tZq/vz5mjFjhs4++2x99NFHVccEAAAAqkFRmoBx48ZJkv7xj3/ohBNO0Le//W0df/zxq9d36tRJrVu3VqtWrTRixAi98MILa23v7rr88st18cUXa+zYsfrZz36mESNG6Prrr68Ys0mbNm3Wmmznk08+kRRNLPToo4/q2Wef1cyZM3XggQeuXrfFFlvIzCRJrVu31qpVq6p+z+6uo446SjNmzNCMGTP0yiuv6NZbby27TdeuXTVgwABts8022nnnnXXEEUdo5syZVcfcEM3z1Nzvfvc79enTR506dVq9bFN8Xs3l4fO6/fbbdfzxx8vMtNdee2nPPffUa6+9ppqaGs2dO3d1u7fffls1NTVV9wUAAAAoRlGaEHfXmWeeqZ49e+r8889fa13xDLD333+/9ttvv7XWT5w4UYMGDVKHDh20YsUKtWrVSq1atdKKFSvWavflL39Zc+bM0ezZsyVJd9111+p13bp10/Tp0yVJ06dP19/+9jdJ0QyqO+64o9q1a6fXXntNzz33XMX3su2222rp0qVl2/Tr10/PPPOMZs2aJSk6tbTSqOcxxxyjp59+WqtWrdKKFSv0/PPPq2fPnhX7synddddd65y6y+cV2X333fXYY49JkhYuXKjXX39dX/rSlzRgwAA9/PDD+uCDD/TBBx/o4Ycf1oABAyr2EwAAACiF2Xer1G3Ug2XXP/PMM/rv//5v7b///qqtrZUk/fSnP9WgQYP0ox/9SDNmzJCZqVu3brrllltWb7dixQqNHz9eDz/8sCTp/PPP16BBg9S2bVvdeeeda8XYaqutVF9fr6OPPlpSNFFQUzFywgknaOLEierVq5cOOeQQ7b333pKkgQMH6te//rV69uypffbZR/369av4XocNG7Z65K/UhDmS1LFjR40fP14nn3yyPv30U0nS5Zdfrr333luXXHKJCoXCOqeE9uzZUwMHDlTv3r3VqlUr/fu///s6BV9Iy5cv1yOPPLLW5yFpk3xe7dq10+GHH56rz+viiy/W8OHDtf/++8vdddVVV2nnnXdeve6ggw6SFF2T2qFDh4r9BAAAAEoxd0+7DyoUCr4x974MoVxRumrpYr1902kBeyPNnz9fb7zxhq6++mpNmTIlaOw8mT9/vrp06ZJ2NyRFp+byeQEAAOCLyMwa3b1Qah2n7yZg5cJZwWNWM6ssyBMAAACQdYyUVqncSOlbVw1W6DyaWfCYeUSeAAAAgPQxUgoAAAAAyCSKUgAAAABAaihKE9BhwNnBYzafMRalkScAAAAg2yhKE7Bt7cDgMevq6oLHzCPyBAAAAGQbRWkC3rpqcPCYZhY8Zh6RJwAAACDbKEoBAAAAAKmhKAUAAAAApIaiNAFbdz8oeMzBg8OfMpxH5AkAAADINorSBOxy4ujgMSdPnhw8Zh6RJwAAACDbKEoT8O69Y4PHHDJkSPCYeUSeAAAAgGyjKE3Ax7P/FDzmlClTgsfMI/IEAAAAZBtF6WbEzHTKKaesfr1q1Sp17Nhxva+r7N+/vxoaGiRJgwYN0pIlSxLt54wZM3TooYeqV69e6t27t37zm9+sXnf44YertrZWtbW16tKli4499thEYwMAAADIljZpdwDJ2WabbfTyyy/r448/1tZbb61HHnlENTU1G7XPhx56KKHerdGuXTtNnDhRPXr00Pz589W3b18NGDBAO+ywg/7whz+sbnfCCSfomGOOSTw+AAAAgOxgpDQBe1wY/hRRdy+5fNCgQXrwwQclSXfddZdOPvnk1euWL1+uM844QwcffLAOPPBA/fa3v5Ukffzxxxo2bJh69uyp4447Th9//PHqbbp166b33ntPknTssceqb9++6tWrl+rr61e3ad++vS666CIdcMAB6tevnxYuXFi273vvvbd69OghSerSpYt22WUXLVq0aK02H330kR5//PGNHiltKU8AAAAAsqHqotTMWpvZi2Y2JX69p5k9b2azzOw3ZtY2Xr5l/HpWvABhjiIAACAASURBVL7bpul6diydMTV4zOKisNiwYcN0991365NPPtFLL72kQw45ZPW6K664QkceeaReeOEFPfHEE7rgggu0fPly3XzzzWrXrp1effVVjR07Vo2NjSX3fdttt6mxsVENDQ26/vrrtXjxYklRsduvXz/NnDlTRxxxhMaNGydJmjRpki655JKy7+OFF17QypUr1b1797WWP/DAA/r617+u7bbbruqclNJSngAAAABkw/qMlJ4r6dWi11dJ+qW77yXpA0lnxsvPlPRBvPyXcbvN2vvTbgwec+TIkSWX9+7dW3PmzNFdd92lQYMGrbXu4Ycf1pVXXqna2lr1799fn3zyif7+97/rqaeeWn0tau/evdW7d++S+77++utXj4bOnTtXb775piSpbdu2q69b7du3r+bMmSNJGjp0qC699NIW38OCBQt06qmn6vbbb1erVmsfis1HeTdUS3kCAAAAkA1VXVNqZl0lHS3pCknnm5lJOlLSv8ZNJkgaI+lmScfEzyXpXkk3mpk551EGM3ToUP3whz/Uk08+uXo0U4pOZb3vvvu0zz77rPc+n3zyST366KN69tln1a5du9VFrSRtscUWig4JqXXr1lq1alXF/X300Uc6+uijdcUVV6hfv35rrXvvvff0wgsv6P7771/vfgIAAADIl2pHSq+V9CNJn8evd5K0xN2bqo+3JTXNqFMjaa4kxes/jNsjkDPOOEOjR4/W/vvvv9byAQMG6IYbblh9neWLL74oSTriiCN05513SpJefvllvfTSS+vs88MPP9SOO+6odu3a6bXXXtNzzz23wf1buXKljjvuOJ122mk68cQT11l/7733avDgwdpqq602OAYAAACAfKhYlJrZYEnvunvpCw03kJnVmVmDmTU0n+QmbzqecHHwmJMmTWpxXdeuXXXOOeess/ziiy/WP/7xD/Xu3Vu9evXSxRdH/f7Od76jZcuWqWfPnrrkkkvUt2/fdbYdOHCgVq1apZ49e2rUqFHrjG621MdS15Tec889euqppzR+/PjVt3+ZMWPG6vV33313IqfuNvUBAAAAQHZZpbNqzew/JZ0qaZWkrSRtJ+l+SQMk7eruq8zsUElj3H2AmU2Lnz9rZm0kvSOpY7nTdwuFgjfdFzOruo16sMV1q5Yu1ts3nRawN9L8+fPVpUuXoDHziDwBAAAA6TOzRncvlFpXcaTU3X/s7l3dvZukYZIed/dvS3pCUtO5l6dL+m38fFL8WvH6xzf360nn/er0yo0StrH3H/2iIE8AAABAtm3MfUovVDTp0SxF14zeGi+/VdJO8fLzJY3auC4CAAAAADZXVc2+28Tdn5T0ZPz8r5IOLtHmE0knJdA3AAAAAMBmbmNGShFrf8CA4DFHjBgRPGYekScAAAAg2yhKE7DTwO8Fj1lfXx88Zh6RJwAAACDbKEoTsGD8ucFjlrptC9ZFngAAAIBsoyhNwMqFs4PHnD59evCYeUSeAAAAgGyjKAUAAAAApIaiNAGt23cIHrNz587BY+YReQIAAACyjaI0AV3Pmhg85vz584PHzCPyBAAAAGQbRWkCljx9R/CYY8aMCR4zj8gTAAAAkG3m7mn3QYVCwRsaGtLuRlndRj3Y4rq3rhqs0Hk0s+Ax84g8AQAAAOkzs0Z3L5Rax0gpAAAAACA1FKUAAAAAgNRQlCZg19OvDR4z66c7ZwV5AgAAALKNohQAAAAAkBqK0gS8M+G84DELhZLXCKMZ8gQAAABkG0UpAAAAACA1FKUAAAAAgNRQlCZg+8NODh5z9OjRwWPmEXkCAAAAss3cPe0+qFAoeNZnSe026sGy6+dceXSgngAAAABAvphZo7uXnPCFkdIEvH3TacFjdunSJXjMPCJPAAAAQLZRlCbgs2XvB4+5YMGC4DHziDwBAAAA2UZRCgAAAABIDUVpAtp26h48Zp8+fYLHzCPyBAAAAGQbRWkCOg+/LnjMxsbG4DHziDwBAAAA2UZRmoDFU28IHrOuri54zDwiTwAAAEC2UZQmYNnMacFjjhs3LnjMPCJPAAAAQLZRlAIAAAAAUkNRCgAAAABIDUVpAmq+OyF4zHnz5gWPmUfkCQAAAMg2itIErFw4K3hMZpWtDnkCAAAAso2iNAGL7rsseMyhQ4cGj5lH5AkAAADINopSAAAAAEBqKEoBAAAAAKmhKE1AhwFnB495yy23BI+ZR+QJAAAAyDaK0gRsWzsweMy6urrgMfOIPAEAAADZRlGagLeuGhw8ppkFj5lH5AkAAADINopSAAAAAEBqKEoBAAAAAKmhKE3A1t0PCh5z8ODwpwznEXkCAAAAso2iNAG7nDg6eMzJkycHj5lH5AkAAADINorSBLx779jgMYcMGRI8Zh6RJwAAACDbKhalZraVmb1gZjPN7C9mNjZePt7M/mZmM+JHbbzczOx6M5tlZi+ZWZ9N/SbS9vHsPwWPOWXKlOAx84g8AQAAANnWpoo2n0o60t2XmdkWkp42s9/F6y5w93ubtf8XST3ixyGSbo7/BQAAAABgLRVHSj2yLH65RfzwMpscI2livN1zknYws84b31UAAAAAwOamqmtKzay1mc2Q9K6kR9z9+XjVFfEpur80sy3jZTWS5hZt/na8bLO1x4XhTxF1L/e9AJqQJwAAACDbqipK3f0zd6+V1FXSwWa2n6QfS/qypIMkdZB04foENrM6M2sws4ZFixatZ7ezZemMqcFj1tfXB4+ZR+QJAAAAyLb1mn3X3ZdIekLSQHdfEJ+i+6mk2yUdHDebJ2m3os26xsua76ve3QvuXujYseOG9T4j3p92Y/CYI0eODB4zj8gTAAAAkG3VzL7b0cx2iJ9vLekoSa81XSdqZibpWEkvx5tMknRaPAtvP0kfuvuCTdJ7AAAAAECuVTP7bmdJE8ystaIi9h53n2Jmj5tZR0kmaYak/4jbPyRpkKRZklZI+rfkuw0AAAAA2BxULErd/SVJB5ZYfmQL7V3SWRvftfzoeMLFwWNOmjQpeMw8Ik8AAABAtq3XNaUorW2nvYLH7Nu3b/CYeUSeAAAAgGyjKE3AvF+dHjxmTc1mfZedxJAnAAAAINsoSgEAAAAAqaEoBQAAAACkhqI0Ae0PGBA85ogRI4LHzCPyBAAAAGQbRWkCdhr4veAx6+vrg8fMI/IEAAAAZBtFaQIWjD83eExmla0OeQIAAACyjaI0ASsXzg4ec/r06cFj5hF5AgAAALKNohQAAAAAkBqK0gS0bt8heMzOnTsHj5lH5AkAAADINorSBHQ9a2LwmPPnzw8eM4/IEwAAAJBtFKUJWPL0HcFjjhkzJnjMPCJPAAAAQLaZu6fdBxUKBW9oaEi7G2V1G/Vgi+veumqwQufRzILHzCPyBAAAAKTPzBrdvVBqHSOlAAAAAIDUUJQCAAAAAFJDUZqAXU+/NnjMrJ/unBXkCQAAAMg2ilIAAAAAQGooShPwzoTzgscsFEpeI4xmyBMAAACQbRSlAAAAAIDUUJQCAAAAAFJDUZqA7Q87OXjM0aNHB4+ZR+QJAAAAyDZz97T7oEKh4FmfJbXbqAfLrp9z5dGBegIAAAAA+WJmje5ecsIXRkoT8PZNpwWP2aVLl+Ax84g8AQAAANlGUZqAz5a9HzzmggULgsfMI/IEAAAAZBtFKQAAAAAgNRSlCWjbqXvwmH369AkeM4/IEwAAAJBtFKUJ6Dz8uuAxGxsbg8fMI/IEAAAAZBtFaQIWT70heMy6urrgMfOIPAEAAADZRlGagGUzpwWPOW7cuOAx84g8AQAAANlGUQoAAAAASA1FKQAAAAAgNRSlCaj57oTgMefNmxc8Zh6RJwAAACDbKEoTsHLhrOAxmVW2OuQJAAAAyDaK0gQsuu+y4DGHDh0aPGYekScAAAAg2yhKAQAAAACpoSgFAAAAAKSGojQBHQacHTzmLbfcEjxmHpEnAAAAINsoShOwbe3A4DHr6uqCx8wj8gQAAABkG0VpAt66anDwmGYWPGYekScAAAAg2yhKAQAAAACpqViUmtlWZvaCmc00s7+Y2dh4+Z5m9ryZzTKz35hZ23j5lvHrWfH6bpv2LQAAAAAA8qqakdJPJR3p7gdIqpU00Mz6SbpK0i/dfS9JH0g6M25/pqQP4uW/jNtt1rbuflDwmIMHhz9lOI/IEwAAAJBtFYtSjyyLX24RP1zSkZLujZdPkHRs/PyY+LXi9V+3zfzCvl1OHB085uTJk4PHzCPyBAAAAGRbVdeUmllrM5sh6V1Jj0iaLWmJu6+Km7wtqSZ+XiNpriTF6z+UtFOSnc6ad+8dGzzmkCFDgsfMI/IEAAAAZFtVRam7f+butZK6SjpY0pc3NrCZ1ZlZg5k1LFq0aGN3l6qPZ/8peMwpU6YEj5lH5AkAAADItvWafdfdl0h6QtKhknYwszbxqq6S5sXP50naTZLi9dtLWlxiX/XuXnD3QseOHTew+wAAAACAPKtm9t2OZrZD/HxrSUdJelVRcXpi3Ox0Sb+Nn0+KXyte/7i7e5KdBgAAAABsHtpUbqLOkiaYWWtFRew97j7FzF6RdLeZXS7pRUm3xu1vlfTfZjZL0vuShm2CfmfKHheGP0WUOr865AkAAADItmpm333J3Q90997uvp+7Xxov/6u7H+zue7n7Se7+abz8k/j1XvH6v27qN5G2pTOmBo9ZX18fPGYekScAAAAg29brmlKU9v60G4PHHDlyZPCYeUSeAAAAgGyjKAUAAAAApIaiFAAAAACQGorSBHQ84eLgMSdNmhQ8Zh6RJwAAACDbKEoT0LbTXsFj9u3bN3jMPCJPAAAAQLZRlCZg3q9Or9woYTU1NcFj5hF5AgAAALKNohQAAAAAkBqKUgAAAABAaihKE9D+gAHBY44YMSJ4zDwiTwAAAEC2UZQmYKeB3wses76+PnjMPCJPAAAAQLZRlCZgwfhzg8dkVtnqkCcAAAAg2yhKE7By4ezgMadPnx48Zh6RJwAAACDbKEoBAAAAAKmhKE1A6/Ydgsfs3Llz8Jh5RJ4AAACAbKMoTUDXsyYGjzl//vzgMfOIPAEAAADZRlGagCVP3xE85pgxY4LHzCPyBAAAAGSbuXvafVChUPCGhoa0u1FWt1EPtrjurasGK3QezSx4zDwiTwAAAED6zKzR3Qul1jFSCgAAAABIDUUpAAAAACA1FKUJ2PX0a4PHzPrpzllBngAAAIBsoygFAAAAAKSGojQB70w4L3jMQqHkNcJohjwBAAAA2UZRCgAAAABIDUUpAAAAACA1FKUJ2P6wk4PHHD16dPCYeUSeAAAAgGwzd0+7DyoUCp71WVK7jXqw7Po5Vx4dqCcAAAAAkC9m1ujuJSd8YaQ0AW/fdFrwmF26dAkeM4/IEwAAAJBtFKUJ+GzZ+8FjLliwIHjMPCJPAAAAQLZRlAIAAAAAUkNRmoC2nboHj9mnT5/gMfOIPAEAAADZRlGagM7Drwses7GxMXjMPCJPAAAAQLZRlCZg8dQbgsesq6sLHjOPyBMAAACQbRSlCVg2c1rwmOPGjQseM4/IEwAAAJBtFKUAAAAAgNRQlAIAAAAAUkNRmoCa704IHnPevHnBY+YReQIAAACyjaI0ASsXzgoek1llq0OeAAAAgGyjKE3AovsuCx5z6NChwWPmEXkCAAAAso2iFAAAAACQGopSAAAAAEBqKhalZrabmT1hZq+Y2V/M7Nx4+Rgzm2dmM+LHoKJtfmxms8zsdTMbsCnfQBZ0GHB28Ji33HJL8Jh5RJ4AAACAbGtTRZtVkn7g7tPNbFtJjWb2SLzul+5+dXFjM9tX0jBJvSR1kfSome3t7p8l2fEs2bZ2YPCYdXV1wWPmEXkCAAAAsq3iSKm7L3D36fHzpZJelVRTZpNjJN3t7p+6+98kzZJ0cBKdzaq3rhocPKaZBY+ZR+QJAAAAyLb1uqbUzLpJOlDS8/Gis83sJTO7zcx2jJfVSJpbtNnbKl/EAgAAAAC+oKouSs2svaT7JJ3n7h9JullSd0m1khZIumZ9AptZnZk1mFnDokWL1mdTAAAAAMBmoqqi1My2UFSQ3uHu/ydJ7r7Q3T9z988ljdOaU3TnSdqtaPOu8bK1uHu9uxfcvdCxY8eNeQ+p27r7QcFjDh4c/pThPCJPAAAAQLZVM/uuSbpV0qvu/oui5Z2Lmh0n6eX4+SRJw8xsSzPbU1IPSS8k1+Xs2eXE0cFjTp48OXjMPCJPAAAAQLZVM1J6mKRTJR3Z7PYvPzOzP5vZS5K+Jun7kuTuf5F0j6RXJE2VdNbmPPOuJL1779jgMYcMGRI8Zh6RJwAAACDbzN3T7oMKhYI3NDSk3Y2yuo16sMV1b101WKHzaGbBY+YReQIAAADSZ2aN7l4otW69Zt8FAAAAACBJFKUAAAAAgNRQlCZgjwunBI/JKanVIU8AAABAtlGUJmDpjKnBY9bX1wePmUfkCQAAAMg2itIEvD/txuAxR44cGTxmHpEnAAAAINsoSgEAAAAAqaEoBQAAAACkhqI0AR1PuDh4zEmTJgWPmUfkCQAAAMg2itIEtO20V/CYffv2DR4zj8gTAAAAkG0UpQmY96vTg8esqakJHjOPyBMAAACQbRSlAAAAAIDUUJQCAAAAAFJDUZqA9gcMCB5zxIgRwWPmEXkCAAAAso2iNAE7Dfxe8Jj19fXBY+YReQIAAACyjaI0AQvGnxs8JrPKVoc8AQAAANlGUZqAlQtnB485ffr04DHziDwBAAAA2UZRCgAAAABIDUVpAlq37xA8ZufOnYPHzCPyBAAAAGQbRWkCup41MXjM+fPnB4+ZR+QJAAAAyDaK0gQsefqO4DHHjBkTPGYekScAAAAg28zd0+6DCoWCNzQ0pN2NsrqNerDFdW9dNVih82hmwWPmEXkCAAAA0mdmje5eKLWOkVIAAAAAQGooSgEAAAAAqaEoTcCup18bPGbWT3fOCvIEAAAAZBtFKQAAAAAgNRSlCXhnwnnBYxYKJa8RRjPkCQAAAMg2ilIAAAAAQGooSgEAAAAAqaEoTcD2h50cPObo0aODx8wj8gQAAABkm7l72n1QoVDwrM+S2m3Ug2XXz7ny6EA9AQAAAIB8MbNGdy854QsjpQl4+6bTgsfs0qVL8Jh5RJ4AAACAbKMoTcBny94PHnPBggXBY+YReQIAAACyjaIUAAAAAJAaitIEtO3UPXjMPn36BI+ZR+QJAAAAyDaK0gR0Hn5d8JiNjY3BY+YReQIAAACyjaI0AYun3hA8Zl1dXfCYeUSeAAAAgGyjKE3AspnTgsccN25c8Jh5RJ4AAACAbKMoBQAAAACkhqIUAAAAAJAaitIE1Hx3QvCY8+bNCx4zj8gTAAAAkG0Vi1Iz283MnjCzV8zsL2Z2bry8g5k9YmZvxv/uGC83M7vezGaZ2Utmttnfk2PlwlnBYzKrbHXIEwAAAJBt1YyUrpL0A3ffV1I/SWeZ2b6SRkl6zN17SHosfi1J/yKpR/yok3Rz4r3OmEX3XRY85tChQ4PHzCPyBAAAAGRbxaLU3Re4+/T4+VJJr0qqkXSMpKbzVidIOjZ+foykiR55TtIOZtY58Z4DAAAAAHJvva4pNbNukg6U9LykTu6+IF71jqRO8fMaSXOLNns7XgYAAAAAwFqqLkrNrL2k+ySd5+4fFa9zd5fk6xPYzOrMrMHMGhYtWrQ+m2ZOhwFnB495yy23BI+ZR+QJAAAAyLaqilIz20JRQXqHu/9fvHhh02m58b/vxsvnSdqtaPOu8bK1uHu9uxfcvdCxY8cN7X8mbFs7MHjMurq64DHziDwBAAAA2VbN7Lsm6VZJr7r7L4pWTZJ0evz8dEm/LVp+WjwLbz9JHxad5rtZeuuqwcFjRh8LKiFPAAAAQLa1qaLNYZJOlfRnM5sRL/uJpCsl3WNmZ0p6S9I343UPSRokaZakFZL+LdEeAwAAAAA2GxWLUnd/WlJLw01fL9HeJZ21kf0CAAAAAHwBrNfsuyht6+4HBY85eHD4U4bziDwBAAAA2UZRmoBdThwdPObkyZODx8wj8gQAAABkG0VpAt69d2zwmEOGDAkeM4/IEwAAAJBtFKUJ+Hj2n4LHnDJlSvCYeUSeAAAAgGyjKAUAAAAApIaiFAAAAACQGorSBOxxYfhTRKM776AS8gQAAABkW8X7lKKypTOmqtuo8m3mXHl0ojHr6+tVV1eX6D43R+QJAAAAyDZGShPw/rQbg8ccOXJk8Jh5RJ4AAACAbKMoBQAAAACkhqIUAAAAAJAaitIEdDzh4uAxJ02aFDxmHpEnAAAAINsoShPQttNewWP27ds3eMw8Ik8AAABAtlGUJmDer04PHrOmpiZ4zDwiTwAAAEC2UZQCAAAAAFJDUQoAAAAASA1FaQLaHzAgeMwRI0YEj5lH5AkAAADINorSBOw08HvBY9bX1wePmUfkCQAAAMg2itIELBh/bvCYzCpbHfIEAAAAZBtFaQJWLpwdPOb06dODx8wj8gQAAABkG0UpAAAAACA1FKUJaN2+Q/CYnTt3Dh4zj8gTAAAAkG0UpQnoetbE4DHnz58fPGYekScAAAAg2yhKE7Dk6TuCxxwzZkzwmHlEngAAAIBsoyhNwIfP3BU85tixY4PHzCPyBAAAAGQbRSkAAAAAIDUUpQAAAACA1FCUJmDX068NHrOhoSF4zDwiTwAAAEC2UZQCAAAAAFJDUZqAdyacFzxmoVAIHjOPyBMAAACQbRSlAAAAAIDUUJQCAAAAAFJDUZqA7Q87OXjM0aNHB4+ZR+QJAAAAyDZz97T7oEKh4FmfJbXbqAc3avs5Vx6dUE8AAAAAIF/MrNHdS074wkhpAt6+6bTgMbt06RI8Zh6RJwAAACDbKEoT8Nmy94PHXLBgQfCYeUSeAAAAgGyjKAUAAAAApIaiNAFtO3UPHrNPnz7BY+YReQIAAACyjaI0AZ2HXxc8ZmNjY/CYeUSeAAAAgGyjKE3A4qk3BI9ZV1cXPGYekScAAAAg2yoWpWZ2m5m9a2YvFy0bY2bzzGxG/BhUtO7HZjbLzF43swGbquNZsmzmtOAxx40bFzxmHpEnAAAAINuqGSkdL2lgieW/dPfa+PGQJJnZvpKGSeoVb/MrM2udVGcBAAAAAJuXikWpuz8lqdp7nhwj6W53/9Td/yZplqSDN6J/AAAAAIDN2MZcU3q2mb0Un967Y7ysRtLcojZvx8s2azXfnRA85rx584LHzCPyBAAAAGTbhhalN0vqLqlW0gJJ16zvDsyszswazKxh0aJFG9iNbFi5cFbwmMwqWx3yBAAAAGTbBhWl7r7Q3T9z988ljdOaU3TnSdqtqGnXeFmpfdS7e8HdCx07dtyQbmTGovsuCx5z6NChwWPmEXkCAAAAsm2DilIz61z08jhJTTPzTpI0zMy2NLM9JfWQ9MLGdREAAAAAsLlqU6mBmd0lqb+knc3sbUmjJfU3s1pJLmmOpJGS5O5/MbN7JL0iaZWks9z9s03TdQAAAABA3lUsSt395BKLby3T/gpJV2xMp/Kmw4Czg8e85ZZbgsfMI/IEAAAAZNvGzL6L2La1pW7jumnV1dUFj5lH5AkAAADINorSBLx11eDgMc0seMw8Ik8AAABAtlGUAgAAAABSQ1EKAAAAAEgNRWkCtu5+UPCYgweHP2U4j8gTAAAAkG0UpQnY5cTRwWNOnjw5eMw8Ik8AAABAtlGUJuDde8cGjzlkyJDgMfOIPAEAAADZRlGagI9n/yl4zClTpgSPmUfkCQAAAMg2ilIAAAAAQGooSgEAAAAAqaEoTcAeF4Y/RdTdg8fMI/IEAAAAZBtFaQKWzpgaPGZ9fX3wmHlEngAAAIBsoyhNwPvTbgwec+TIkcFj5hF5AgAAALKNohQAAAAAkBqKUgAAAABAaihKE9DxhIuDx5w0aVLwmHlEngAAAIBsoyhNQNtOewWP2bdv3+Ax84g8AQAAANlGUZqAeb86PXjMmpqa4DHziDwBAAAA2UZRCgAAAABIDUUpAAAAACA1FKUJaH/AgOAxR4wYETxmHpEnAAAAINsoShOw08DvBY9ZX18fPGYekScAAAAg2yhKE7Bg/LnBYzKrbHXIEwAAAJBtFKUJWLlwdvCY06dPDx4zj8gTAAAAkG0UpQAAAACA1FCUJqB1+w7BY3bu3Dl4zDwiTwAAAEC2UZQmoOtZE4PHnD9/fvCYeUSeAAAAgGyjKE3AkqfvCB5zzJgxwWPmEXkCAAAAso2iNAEfPnNX8Jhjx44NHjOPyBMAAACQbRSlAAAAAIDUtEm7A18U3UY9WHb9nCuPDtQTAAAAAMgORkoTsOvp1waP2dDQEDxmHpEnAAAAINsoSgEAAAAAqaEoTcA7E84LHrNQKASPmUfkCQAAAMg2ilIAAAAAQGooSgEAAAAAqaEoTcD2h50cPObo0aODx8wj8gQAAABkm7l72n1QoVDwrM+SWumWLhuLW8IAAAAA2FyZWaO7l5zwhZHSBLx902nBY3bp0iV4zDwiTwAAAEC2UZQm4LNl7wePuWDBguAx84g8AQAAANlWsSg1s9vM7F0ze7loWQcze8TM3oz/3TFebmZ2vZnNMrOXzKzPpuw8AAAAACDfqhkpHS9pYLNloyQ95u49JD0Wv5akf5HUI37USbo5mW5mW9tO3YPH7NOHer8a5AkAAADItopFqbs/Jan5+anHSJoQP58g6dii5RM98pykHcysc1KdzarOw68LHrOxsTF4zDwiTwAAAEC2beg1pZ3cvelivXckdYqf10iaW9Tu7XjZ/2/v/mP1LMs7gH+vFZlGiAysFVp+OCUaTQZiVYzEqKh0UsFEYjRudsZ5XASDyZbJlhhgxgT/mTp/heKvujjU4JwVDGjQZNNkSoswf0cgJVJLy/g1mEaCu/fHeYqH2p6ect73fd7nvJ9PcnLe536fnuvi7pXzcvW+n+dZ0e6+9sMTjzk3NzfxmENkngAAYLot+0ZHbf6ZgU01CwAADlhJREFUMof8XJmqmquqbVW17a677lpuGr168ObrJh7ziiuumHjMITJPAAAw3R5rU7p777bc7vuebnxnkuMXnLeuG/s9rbXNrbX1rbX1q1evfoxpAAAAMGSPtSndmmRT93pTkq8sGH9zdxfe05Pcv2CbLwAAADzKYQc7oaquTPLSJE+uqjuSXJzksiRfrKq3Jrk9yeu707+W5NVJbknyqyRvGUPOU2ftO7Yc/KQR27lzvwvQ7MM8AQDAdDtoU9pae+MB3jpzP+e2JOcvN6mheWj3LTnsyGMmGnP79u057rjjJhpziMwTAABMt2Xf6Ijkri+9d+IxzznnnInHHCLzBAAA001TCgAAQG80pQAAAPRGUzoCR591wcRjXn755ROPOUTmCQAAppumdASOPHXDxGPOzc1NPOYQmScAAJhumtIRuP39Gyces6omHnOIzBMAAEw3TSkAAAC90ZQCAADQG03pCDzh6c+feMyNGye/ZXiIzBMAAEw3TekIPOW8iyce86tf/erEYw6ReQIAgOl2WN8JrAR7rrp02Y3pSRdds+j7Oy47+1HHr3nNazRcS2CeAABgulkpHYFf33rDxGNeffXVE485ROYJAACmm6YUAACA3mhKAQAA6I2mdAROfPfkt4i21iYec4jMEwAATDdN6Qg8cNO1E4+5efPmicccIvMEAADTTVM6Avdc95GJx3z7298+8ZhDZJ4AAGC6aUoBAADojaYUAACA3mhKR2D1694z8Zhbt26deMwhMk8AADDdDus7gZXg8DXPGHuMky665lHHDz9wb975nd+N7bjs7LHnMETPe97z+k4BAABYhJXSEdj5sU0zEXOI1q5d23cKAADAIjSlAAAA9EZTCgAAQG80pSNwxClnzUTMIXrb297WdwoAAMAiNKUjcMyGd85EzCHavHlz3ykAAACL0JSOwK7PXDgTMYfI3XcBAGC6aUpH4KHdt85EzCG68cYb+04BAABYhKYUAACA3hzWdwIrwaojjl4RMU+66JpF399x2dkjjzluxx57bN8pAAAAi7BSOgLrzv/sTMQcol/+8pd9pwAAACxCUzoC9337czMRc4guueSSvlMAAAAWoSkdgfu/c+VMxByiSy+9tO8UAACARWhKAQAA6I2mFAAAgN5oSkfgqZs+OBMxh2jbtm19pwAAACzCI2FmxMEe9wIAANAHK6UjcOeWd81EzCFav3593ykAAACL0JQCAADQG9t3VwjbcwEAgCFaVlNaVTuSPJDkt0kebq2tr6qjk3whyUlJdiR5fWvt3uWlOd2e9OI3zkTMIbr44ov7TgEAAFjEKLbvvqy1dmprbe/Fexclub61dnKS67vjFe2oM940EzGH6JJLLuk7BQAAYBHjuKb03CRbutdbkrx2DDGmyh0fffNMxByi4447ru8UAACARSy3KW1Jvl5V26tqrhtb01rb1b2+M8maZcaYer998J6ZiDlEu3btOvhJAABAb5Z7o6MzWms7q+opSb5RVT9d+GZrrVVV298f7JrYuSQ54YQTlpkGAAAAQ7SsldLW2s7u+54kX07ygiS7q+rYJOm+7znAn93cWlvfWlu/evXq5aTRu8PXPH0mYg7Raaed1ncKAADAIh5zU1pVT6yqI/e+TvKqJD9MsjXJpu60TUm+stwkp92xf/GhmYg5RNu3b+87BQAAYBHLWSldk+TbVXVzku8luaa1dm2Sy5K8sqp+nuQV3fGKdve1H56JmEM0Nzd38JMAAIDePOamtLV2W2vtlO7rOa2193Xjd7fWzmytndxae0VrbcXfkefBm6+biZhDdMUVV/SdAgAAsIhxPBIGAAAAlmS5d99lhpx00TWLvr/jsrMnlAkAALBSWCkdgbXv2DITMYdo586dfacAAAAsQlM6Ag/tvmUmYg6Ru+8CAMB005SOwF1feu9MxByic845p+8UAACARbimlJFxzSkAAHCorJQCAADQGyulI3D0WRfMRMxxG8dK6+WXX/5Y0wEAACZAUzoCR566YSZiLtfBms5xmJubm3hMAABg6WzfHYHb379xJmIOUVX1nQIAALAITSkAAAC90ZQCAADQG03pCDzh6c+fiZhDtHGjbc4AADDN3OhoBJ5y3sUzEXPa7fdGSs/5q0fGPScVAACmj6Z0BPZcdenEm8Q+Yg7RrM3TOB6rAwAA42T77gj8+tYbZiLmEJknAACYblZKGYxJPOfUSiMAAEyWlVIAAAB6oykdgRPfffVMxBwi8wQAANNNUzoCD9x07UzEHCLzBAAA001TOgL3XPeRmYg5ROYJAACmm6YUAACA3rj7LoyQu/cCAMCh0ZSOwOrXvWcmYg7RwnmaxCNlAACAQ2P77ggcvuYZMxFziMwTAABMNyulI7DzY5sm/uiRPmIO0dDmyfbf5TOHAADDYqUUAACA3mhKAQAA6I2mdASOOOWsmYg5ROYJAACmm2tKR+CYDe+ciZhDNOp56vsOvn3HXwrXdAIAcCislI7Ars9cOBMxh8g8AQDAdLNSOgIP7b51JmIOkXk6NEtZibXSCQDAKGlKAVYYW6gBgCHRlI7AqiOOnomYQ2Se2JeGDQBgumhKR2Dd+Z+diZhDNG3z1PeNivqOPy05AAAwPdzoaATu+/bnZiLmEJknAACYblZKR+D+71yZo85404qPOUTmiSGyxRgAmCWaUuCQ2H7LUiy3Tmah8faPDwAwT1MKsMAkmm7NBkuhaQVgVmhKR+Cpmz44EzGHyDwxjZbb+FqtHj8NIQBMztia0qrakORDSVYl+URr7bJxxQJg6YawtXa5TaGmEgCGYyxNaVWtSvLRJK9MckeSG6pqa2vtx+OI17c7t7wrJ7776hUfc4jME4zeNKzUTkMOQ6dxB2BajGul9AVJbmmt3ZYkVfX5JOcmWZFNKQCzZRaa4mloWsedwzT8Nx7MEHJczNDzZ56/R8ZtXE3p2iS/WHB8R5IXjikWABwSTeX0//whWAlzMITt/MvVd0M1hBvorYRaXun6ruNxq9ba6H9o1XlJNrTW/rI7/vMkL2ytXbDgnLkkc93hM5P8bOSJjMaTk/x330kwGOqFpVIrHAr1wlKpFQ6FemGpRlErJ7bWVu/vjXGtlO5McvyC43Xd2CNaa5uTbB5T/JGpqm2ttfV958EwqBeWSq1wKNQLS6VWOBTqhaUad638wZh+7g1JTq6qp1XV4UnekGTrmGIBAAAwUGNZKW2tPVxVFyS5LvOPhPlUa+1H44gFAADAcI3tOaWtta8l+dq4fv4ETf0WY6aKemGp1AqHQr2wVGqFQ6FeWKqx1spYbnQEAAAASzGua0oBAADgoDSli6iqDVX1s6q6paou6jsf+ldVn6qqPVX1wwVjR1fVN6rq5933P+rGq6r+qauf/6qq0/rLnEmrquOr6ltV9eOq+lFVXdiNqxcepaoeX1Xfq6qbu1q5tBt/WlV9t6uJL3Q3DkxV/WF3fEv3/kl95s/kVdWqqvp+VV3dHasV9quqdlTVD6rqpqra1o35HOL3VNVRVXVVVf20qn5SVS+aZK1oSg+gqlYl+WiSP03y7CRvrKpn95sVU+AzSTbsM3ZRkutbaycnub47TuZr5+Tuay7JxyeUI9Ph4SR/3Vp7dpLTk5zf/Q5RL+zrN0le3lo7JcmpSTZU1elJ3p/kA621ZyS5N8lbu/PfmuTebvwD3XnMlguT/GTBsVphMS9rrZ264HEePofYnw8luba19qwkp2T+d8zEakVTemAvSHJLa+221tpDST6f5Nyec6JnrbV/T3LPPsPnJtnSvd6S5LULxj/b5v1nkqOq6tjJZErfWmu7Wms3dq8fyPwv97VRL+yj+zt/sDt8XPfVkrw8yVXd+L61sreGrkpyZlXVhNKlZ1W1LsnZST7RHVfUCofG5xCPUlVPSvKSJJ9MktbaQ621+zLBWtGUHtjaJL9YcHxHNwb7WtNa29W9vjPJmu61GiJJ0m2Ze26S70a9sB/ddsybkuxJ8o0ktya5r7X2cHfKwnp4pFa69+9PcsxkM6ZHH0zyt0n+rzs+JmqFA2tJvl5V26tqrhvzOcS+npbkriSf7i4N+ERVPTETrBVNKYxQm7+dtVta84iqOiLJl5K8q7X2PwvfUy/s1Vr7bWvt1CTrMr9T51k9p8QUqqqNSfa01rb3nQuDcUZr7bTMb7c8v6pesvBNn0N0DktyWpKPt9aem+R/87utuknGXyua0gPbmeT4BcfrujHY1+69Wxa673u6cTU046rqcZlvSD/XWvvXbli9cEDddqlvJXlR5rdD7X2e+MJ6eKRWuveflOTuCadKP16c5Jyq2pH5y4penvnrwNQK+9Va29l935Pky5n/Ry+fQ+zrjiR3tNa+2x1flfkmdWK1oik9sBuSnNzd0e7wJG9IsrXnnJhOW5Ns6l5vSvKVBeNv7u5QdnqS+xdsgWCF667b+mSSn7TW/nHBW+qFR6mq1VV1VPf6CUlemflrkL+V5LzutH1rZW8NnZfkm81Dx2dCa+3vWmvrWmsnZf7/S77ZWntT1Ar7UVVPrKoj975O8qokP4zPIfbRWrszyS+q6pnd0JlJfpwJ1kr53XRgVfXqzF+7sSrJp1pr7+s5JXpWVVcmeWmSJyfZneTiJP+W5ItJTkhye5LXt9bu6ZqSj2T+br2/SvKW1tq2PvJm8qrqjCT/keQH+d21X3+f+etK1QuPqKo/yfwNJFZl/h+Lv9ha+4eq+uPMr4YdneT7Sf6stfabqnp8kn/O/HXK9yR5Q2vttn6ypy9V9dIkf9Na26hW2J+uLr7cHR6W5F9aa++rqmPic4h9VNWpmb+B2uFJbkvylnSfSZlArWhKAQAA6I3tuwAAAPRGUwoAAEBvNKUAAAD0RlMKAABAbzSlAAAA9EZTCgAAQG80pQAAAPRGUwoAAEBv/h8FqDPMJfOR7wAAAABJRU5ErkJggg==\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "items_per_user=df.groupby(['item']).count()['rating']\n",
+ "\n",
+ "plt.figure(figsize=(16,8))\n",
+ "plt.hist(items_per_user, bins=100)\n",
+ "\n",
+ "# Let's add median\n",
+ "t=items_per_user.median()\n",
+ "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
+ "plt.text(t*1.1, plt.ylim()[1]*0.9, 'Median: {:.0f}'.format(t))\n",
+ "\n",
+ "# Let's add also some percentiles\n",
+ "t=items_per_user.quantile(0.25)\n",
+ "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
+ "plt.text(t*1.1, plt.ylim()[1]*0.95, '25% quantile: {:.0f}'.format(t))\n",
+ "\n",
+ "t=items_per_user.quantile(0.75)\n",
+ "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
+ "plt.text(t*1.05, plt.ylim()[1]*0.95, '75% quantile: {:.0f}'.format(t))\n",
+ "\n",
+ "plt.title('Number of ratings per item', fontsize=30)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "rating\n",
+ "1 0.06110\n",
+ "2 0.11370\n",
+ "3 0.27145\n",
+ "4 0.34174\n",
+ "5 0.21201\n",
+ "Name: user, dtype: float64"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.groupby(['rating']).count()['user']/len(df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Item attributes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "genres = pd.read_csv('./Datasets/ml-100k/u.genre', sep='|', header=None,\n",
+ " encoding='latin-1')\n",
+ "genres=dict(zip(genres[1], genres[0]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{0: 'unknown',\n",
+ " 1: 'Action',\n",
+ " 2: 'Adventure',\n",
+ " 3: 'Animation',\n",
+ " 4: \"Children's\",\n",
+ " 5: 'Comedy',\n",
+ " 6: 'Crime',\n",
+ " 7: 'Documentary',\n",
+ " 8: 'Drama',\n",
+ " 9: 'Fantasy',\n",
+ " 10: 'Film-Noir',\n",
+ " 11: 'Horror',\n",
+ " 12: 'Musical',\n",
+ " 13: 'Mystery',\n",
+ " 14: 'Romance',\n",
+ " 15: 'Sci-Fi',\n",
+ " 16: 'Thriller',\n",
+ " 17: 'War',\n",
+ " 18: 'Western'}"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "genres"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "movies = pd.read_csv('./Datasets/ml-100k/u.item', sep='|', encoding='latin-1', header=None)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 7 | \n",
+ " 8 | \n",
+ " 9 | \n",
+ " ... | \n",
+ " 14 | \n",
+ " 15 | \n",
+ " 16 | \n",
+ " 17 | \n",
+ " 18 | \n",
+ " 19 | \n",
+ " 20 | \n",
+ " 21 | \n",
+ " 22 | \n",
+ " 23 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Toy Story (1995) | \n",
+ " 01-Jan-1995 | \n",
+ " NaN | \n",
+ " http://us.imdb.com/M/title-exact?Toy%20Story%2... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " GoldenEye (1995) | \n",
+ " 01-Jan-1995 | \n",
+ " NaN | \n",
+ " http://us.imdb.com/M/title-exact?GoldenEye%20(... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " Four Rooms (1995) | \n",
+ " 01-Jan-1995 | \n",
+ " NaN | \n",
+ " http://us.imdb.com/M/title-exact?Four%20Rooms%... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
3 rows × 24 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 \\\n",
+ "0 1 Toy Story (1995) 01-Jan-1995 NaN \n",
+ "1 2 GoldenEye (1995) 01-Jan-1995 NaN \n",
+ "2 3 Four Rooms (1995) 01-Jan-1995 NaN \n",
+ "\n",
+ " 4 5 6 7 8 9 ... \\\n",
+ "0 http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 0 1 1 ... \n",
+ "1 http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 1 0 0 ... \n",
+ "2 http://us.imdb.com/M/title-exact?Four%20Rooms%... 0 0 0 0 0 ... \n",
+ "\n",
+ " 14 15 16 17 18 19 20 21 22 23 \n",
+ "0 0 0 0 0 0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 0 0 1 0 0 \n",
+ "2 0 0 0 0 0 0 0 1 0 0 \n",
+ "\n",
+ "[3 rows x 24 columns]"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "movies[:3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for i in range(19):\n",
+ " movies[i+5]=movies[i+5].apply(lambda x: genres[i] if x==1 else '')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "movies['genre']=movies.iloc[:, 5:].apply(lambda x: ', '.join(x[x!='']), axis = 1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "movies=movies[[0,1,'genre']]\n",
+ "movies.columns=['id', 'title', 'genres']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " title | \n",
+ " genres | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Toy Story (1995) | \n",
+ " Animation, Children's, Comedy | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " GoldenEye (1995) | \n",
+ " Action, Adventure, Thriller | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " Four Rooms (1995) | \n",
+ " Thriller | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " Get Shorty (1995) | \n",
+ " Action, Comedy, Drama | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " Copycat (1995) | \n",
+ " Crime, Drama, Thriller | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id title genres\n",
+ "0 1 Toy Story (1995) Animation, Children's, Comedy\n",
+ "1 2 GoldenEye (1995) Action, Adventure, Thriller\n",
+ "2 3 Four Rooms (1995) Thriller\n",
+ "3 4 Get Shorty (1995) Action, Comedy, Drama\n",
+ "4 5 Copycat (1995) Crime, Drama, Thriller"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "movies.to_csv('./Datasets/ml-100k/movies.csv', index=False)\n",
+ "movies[:5]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Toy example"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "os.makedirs('./Datasets/toy-example/', exist_ok = True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "toy_train=pd.DataFrame([[0,0,3,0], [0,10,4,0], [0,40,5,0], [0,70,4,0],\n",
+ " [10,10,1,0], [10,20,2,0], [10,30,3,0],\n",
+ " [20,30,5,0], [20,50,3,0], [20,60,4,0]])\n",
+ "toy_test=pd.DataFrame([[0,60,3,0],\n",
+ " [10,40,5,0],\n",
+ " [20,0,5,0], [20,20,4,0], [20,70,2,0]])\n",
+ "\n",
+ "toy_train.to_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, index=False)\n",
+ "toy_test.to_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, index=False)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/P1. Baseline.ipynb b/P1. Baseline.ipynb
new file mode 100644
index 0000000..5e073cf
--- /dev/null
+++ b/P1. Baseline.ipynb
@@ -0,0 +1,1269 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Preparing dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import scipy.sparse as sparse\n",
+ "from collections import defaultdict\n",
+ "from itertools import chain\n",
+ "import random\n",
+ "\n",
+ "train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
+ "test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Let's prepare dataset\n",
+ "train_and_test=pd.concat([train_read, test_read], axis=0, ignore_index=True)\n",
+ "train_and_test['user_code'] = train_and_test['user'].astype(\"category\").cat.codes\n",
+ "train_and_test['item_code'] = train_and_test['item'].astype(\"category\").cat.codes\n",
+ "\n",
+ "user_code_id = dict(enumerate(train_and_test['user'].astype(\"category\").cat.categories))\n",
+ "user_id_code = dict((v, k) for k, v in user_code_id.items())\n",
+ "item_code_id = dict(enumerate(train_and_test['item'].astype(\"category\").cat.categories))\n",
+ "item_id_code = dict((v, k) for k, v in item_code_id.items())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " item | \n",
+ " rating | \n",
+ " timestamp | \n",
+ " user_code | \n",
+ " item_code | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 664 | \n",
+ " 525 | \n",
+ " 4 | \n",
+ " 876526580 | \n",
+ " 663 | \n",
+ " 524 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 49 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 888068651 | \n",
+ " 48 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 352 | \n",
+ " 273 | \n",
+ " 2 | \n",
+ " 884290328 | \n",
+ " 351 | \n",
+ " 272 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 618 | \n",
+ " 96 | \n",
+ " 3 | \n",
+ " 891307749 | \n",
+ " 617 | \n",
+ " 95 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 560 | \n",
+ " 24 | \n",
+ " 2 | \n",
+ " 879976772 | \n",
+ " 559 | \n",
+ " 23 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user item rating timestamp user_code item_code\n",
+ "0 664 525 4 876526580 663 524\n",
+ "1 49 1 2 888068651 48 0\n",
+ "2 352 273 2 884290328 351 272\n",
+ "3 618 96 3 891307749 617 95\n",
+ "4 560 24 2 879976772 559 23"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_and_test[:5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "train_df=pd.merge(train_read, train_and_test, on=list(train_read.columns))\n",
+ "test_df=pd.merge(test_read, train_and_test, on=list(train_read.columns))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Take number of users and items\n",
+ "(U,I)=(train_and_test['user_code'].max()+1, train_and_test['item_code'].max()+1)\n",
+ "\n",
+ "# Create sparse csr matrices\n",
+ "train_ui = sparse.csr_matrix((train_df['rating'], (train_df['user_code'], train_df['item_code'])), shape=(U, I))\n",
+ "test_ui = sparse.csr_matrix((test_df['rating'], (test_df['user_code'], test_df['item_code'])), shape=(U, I))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Above steps are the same for many algorithms, so I put the code in separate file:\n",
+ "import helpers\n",
+ "train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n",
+ "test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
+ "train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### CSR matrices - what is it?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "<3x4 sparse matrix of type ''\n",
+ "\twith 8 stored elements in Compressed Sparse Row format>"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "row = np.array([0, 0, 0, 1, 1, 2, 2, 2])\n",
+ "col = np.array([0, 1, 2, 1, 3, 2, 0, 3])\n",
+ "data = np.array([4, 1, 3, 2,1, 5, 2, 4])\n",
+ "sample_csr=sparse.csr_matrix((data, (row, col)))\n",
+ "sample_csr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Ratings matrix with missing entries replaced by zeros:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[4, 1, 3, 0],\n",
+ " [0, 2, 0, 1],\n",
+ " [2, 0, 5, 4]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Number of ratings: 8 \n",
+ "Number of users: 3 \n",
+ "Number of items: 4 \n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('Ratings matrix with missing entries replaced by zeros:')\n",
+ "display(sample_csr.todense())\n",
+ "\n",
+ "print('\\nNumber of ratings: {} \\nNumber of users: {} \\nNumber of items: {} \\n'\n",
+ " .format(sample_csr.nnz, sample_csr.shape[0], sample_csr.shape[1]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Ratings data: [4 1 3 2 1 2 5 4]\n",
+ "Regarding items: [0 1 2 1 3 0 2 3]\n",
+ "Where ratings from 0 to 2 belongs to user 0.\n",
+ "Where ratings from 3 to 4 belongs to user 1.\n",
+ "Where ratings from 5 to 7 belongs to user 2.\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('Ratings data:', sample_csr.data)\n",
+ "\n",
+ "print('Regarding items:', sample_csr.indices)\n",
+ "\n",
+ "for i in range(sample_csr.shape[0]):\n",
+ " print('Where ratings from {} to {} belongs to user {}.'.format(sample_csr.indptr[i], sample_csr.indptr[i+1]-1, i))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Efficient way to access items rated by user:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([ 0, 6, 10, 27, 49, 78, 95, 97, 116, 143, 153, 156, 167,\n",
+ " 171, 172, 173, 194, 208, 225, 473, 495, 549, 615], dtype=int32)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1.1 µs ± 63.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n",
+ "Inefficient way to access items rated by user:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([ 0, 6, 10, 27, 49, 78, 95, 97, 116, 143, 153, 156, 167,\n",
+ " 171, 172, 173, 194, 208, 225, 473, 495, 549, 615], dtype=int32)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "149 µs ± 13.2 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "user=123\n",
+ "\n",
+ "print('Efficient way to access items rated by user:')\n",
+ "display(train_ui.indices[train_ui.indptr[user]:train_ui.indptr[user+1]])\n",
+ "%timeit train_ui.indices[train_ui.indptr[user]:train_ui.indptr[user+1]]\n",
+ "\n",
+ "print('Inefficient way to access items rated by user:')\n",
+ "display(train_ui[user].indices)\n",
+ "%timeit train_ui[user].indices"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "###### Example: subtracting row means"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Our matrix:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[4, 1, 3, 0],\n",
+ " [0, 2, 0, 1],\n",
+ " [2, 0, 5, 4]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "List of row sums:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[ 8, 3, 11]])"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "print('Our matrix:')\n",
+ "display(sample_csr.todense())\n",
+ "print('List of row sums:')\n",
+ "sample_csr.sum(axis=1).ravel()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Array with row means:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([2.66666667, 1.5 , 3.66666667])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Diagonal csr matrix with inverse of row sums on diagonal:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[2.66666667, 0. , 0. ],\n",
+ " [0. , 1.5 , 0. ],\n",
+ " [0. , 0. , 3.66666667]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Let's apply them in nonzero entries:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[2.66666667, 2.66666667, 2.66666667, 0. ],\n",
+ " [0. , 1.5 , 0. , 1.5 ],\n",
+ " [3.66666667, 0. , 3.66666667, 3.66666667]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Finally after subtraction:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[ 1.33333333, -1.66666667, 0.33333333, 0. ],\n",
+ " [ 0. , 0.5 , 0. , -0.5 ],\n",
+ " [-1.66666667, 0. , 1.33333333, 0.33333333]])"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "print('Array with row means:')\n",
+ "row_means=np.asarray(sample_csr.sum(axis=1).ravel())[0]/np.diff(sample_csr.indptr)\n",
+ "display(row_means)\n",
+ "\n",
+ "print('Diagonal csr matrix with inverse of row sums on diagonal:')\n",
+ "display(sparse.diags(row_means).todense())\n",
+ "\n",
+ "print(\"\"\"Let's apply them in nonzero entries:\"\"\")\n",
+ "to_subtract=sparse.diags(row_means)*sample_csr.power(0)\n",
+ "display(to_subtract.todense())\n",
+ "\n",
+ "print(\"Finally after subtraction:\")\n",
+ "sample_csr-to_subtract.todense()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "###### Transposing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sample matrix: \n",
+ " [[4 1 3 0]\n",
+ " [0 2 0 1]\n",
+ " [2 0 5 4]]\n",
+ "\n",
+ "Indices: \n",
+ " [0 1 2 1 3 0 2 3]\n",
+ "\n",
+ "Transposed matrix: \n",
+ " [[4 0 2]\n",
+ " [1 2 0]\n",
+ " [3 0 5]\n",
+ " [0 1 4]]\n",
+ "\n",
+ "Indices of transposed matrix: \n",
+ " [0 1 2 1 3 0 2 3]\n",
+ "\n",
+ "Reason: \n",
+ "\n",
+ "After converting to csr: \n",
+ " [0 2 0 1 0 2 1 2]\n"
+ ]
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "from scipy import sparse\n",
+ "row = np.array([0, 0, 0, 1, 1, 2, 2, 2])\n",
+ "col = np.array([0, 1, 2, 1, 3, 2, 0, 3])\n",
+ "data = np.array([4, 1, 3, 2,1, 5, 2, 4])\n",
+ "sample=sparse.csr_matrix((data, (row, col)))\n",
+ "print('Sample matrix: \\n', sample.A)\n",
+ "print('\\nIndices: \\n', sample.indices)\n",
+ "transposed=sample.transpose()\n",
+ "print('\\nTransposed matrix: \\n', transposed.A)\n",
+ "print('\\nIndices of transposed matrix: \\n', transposed.indices)\n",
+ "\n",
+ "print('\\nReason: ', type(transposed))\n",
+ "\n",
+ "print('\\nAfter converting to csr: \\n', transposed.tocsr().indices)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Self made top popular"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "if not os.path.exists('./Recommendations generated/'):\n",
+ " os.mkdir('./Recommendations generated/')\n",
+ " os.mkdir('./Recommendations generated/ml-100k/')\n",
+ " os.mkdir('./Recommendations generated/toy-example/')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "TopPop=[]\n",
+ "train_iu=train_ui.transpose().tocsr()\n",
+ "scaling_factor=train_ui.max()/max(np.diff(train_iu.indptr))\n",
+ "\n",
+ "for i in range(train_iu.shape[0]):\n",
+ " TopPop.append((i, (train_iu.indptr[i+1]-train_iu.indptr[i])*scaling_factor))\n",
+ " \n",
+ "TopPop.sort(key=lambda x: x[1], reverse=True)\n",
+ "#TopPop is an array of pairs (item, rescaled_popularity) sorted descending from the most popular\n",
+ "\n",
+ "k=10\n",
+ "result=[]\n",
+ "\n",
+ "for u in range(train_ui.shape[0]):\n",
+ " user_rated=train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n",
+ " rec_user=[]\n",
+ " item_pos=0\n",
+ " while len(rec_user)<10:\n",
+ " if TopPop[item_pos][0] not in user_rated:\n",
+ " rec_user.append((item_code_id[TopPop[item_pos][0]], TopPop[item_pos][1]))\n",
+ " item_pos+=1\n",
+ " result.append([user_code_id[u]]+list(chain(*rec_user)))\n",
+ "\n",
+ "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopPop_reco.csv', index=False, header=False)\n",
+ "\n",
+ "\n",
+ "# estimations - score is a bit artificial since that method is not designed for scoring, but for ranking\n",
+ "\n",
+ "estimations=[]\n",
+ "\n",
+ "for user, item in zip(*test_ui.nonzero()):\n",
+ " estimations.append([user_code_id[user], item_code_id[item],\n",
+ " (train_iu.indptr[item+1]-train_iu.indptr[item])*scaling_factor])\n",
+ "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopPop_estimations.csv', index=False, header=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Self made global average"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "GlobalAvg=[]\n",
+ "avg=np.sum(train_ui)/train_ui.nnz\n",
+ "\n",
+ "for i in range(train_iu.shape[0]):\n",
+ " GlobalAvg.append((i, avg))\n",
+ " \n",
+ "k=10\n",
+ "result=[]\n",
+ "\n",
+ "for u in range(train_ui.shape[0]):\n",
+ " user_rated=train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n",
+ " rec_user=[]\n",
+ " item_pos=0\n",
+ " while len(rec_user)<10:\n",
+ " if GlobalAvg[item_pos][0] not in user_rated:\n",
+ " rec_user.append((item_code_id[GlobalAvg[item_pos][0]], GlobalAvg[item_pos][1]))\n",
+ " item_pos+=1\n",
+ " result.append([user_code_id[u]]+list(chain(*rec_user)))\n",
+ "\n",
+ "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_GlobalAvg_reco.csv', index=False, header=False)\n",
+ "\n",
+ "\n",
+ "# estimations - score is a bit artificial since that method is not designed for scoring, but for ranking\n",
+ "\n",
+ "estimations=[]\n",
+ "\n",
+ "for user, item in zip(*test_ui.nonzero()):\n",
+ " estimations.append([user_code_id[user], item_code_id[item], avg])\n",
+ "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_GlobalAvg_estimations.csv', index=False, header=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 7 | \n",
+ " 8 | \n",
+ " 9 | \n",
+ " ... | \n",
+ " 11 | \n",
+ " 12 | \n",
+ " 13 | \n",
+ " 14 | \n",
+ " 15 | \n",
+ " 16 | \n",
+ " 17 | \n",
+ " 18 | \n",
+ " 19 | \n",
+ " 20 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " 3.529975 | \n",
+ " 10 | \n",
+ " 3.529975 | \n",
+ " 25 | \n",
+ " 3.529975 | \n",
+ " 32 | \n",
+ " 3.529975 | \n",
+ " 33 | \n",
+ " ... | \n",
+ " 44 | \n",
+ " 3.529975 | \n",
+ " 46 | \n",
+ " 3.529975 | \n",
+ " 50 | \n",
+ " 3.529975 | \n",
+ " 52 | \n",
+ " 3.529975 | \n",
+ " 55 | \n",
+ " 3.529975 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 3.529975 | \n",
+ " 2 | \n",
+ " 3.529975 | \n",
+ " 3 | \n",
+ " 3.529975 | \n",
+ " 4 | \n",
+ " 3.529975 | \n",
+ " 5 | \n",
+ " ... | \n",
+ " 6 | \n",
+ " 3.529975 | \n",
+ " 7 | \n",
+ " 3.529975 | \n",
+ " 8 | \n",
+ " 3.529975 | \n",
+ " 9 | \n",
+ " 3.529975 | \n",
+ " 11 | \n",
+ " 3.529975 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 21 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 4 5 6 7 8 9 ... 11 \\\n",
+ "0 1 5 3.529975 10 3.529975 25 3.529975 32 3.529975 33 ... 44 \n",
+ "1 2 1 3.529975 2 3.529975 3 3.529975 4 3.529975 5 ... 6 \n",
+ "\n",
+ " 12 13 14 15 16 17 18 19 20 \n",
+ "0 3.529975 46 3.529975 50 3.529975 52 3.529975 55 3.529975 \n",
+ "1 3.529975 7 3.529975 8 3.529975 9 3.529975 11 3.529975 \n",
+ "\n",
+ "[2 rows x 21 columns]"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.DataFrame(result)[:2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Project task 1 - self made top rated"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# project task 1: implement TopRated\n",
+ "# Implement recommender system which will recommend movies (which user hasn't seen) with the highest average rating\n",
+ "# The output should be saved in 'Recommendations generated/ml-100k/Self_TopRated_reco.csv'\n",
+ "# and 'Recommendations generated/ml-100k/Self_TopRated_estimations.csv'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Self-made baseline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class selfBaselineUI():\n",
+ " \n",
+ " def fit(self, train_ui):\n",
+ " self.train_ui=train_ui.copy()\n",
+ " self.train_iu=train_ui.transpose().tocsr()\n",
+ " \n",
+ " result=self.train_ui.copy()\n",
+ " \n",
+ " self.row_means=np.asarray(result.sum(axis=1).ravel())[0]/np.diff(result.indptr)\n",
+ " \n",
+ " # in csr format after addition or multiplication 0 entries \"disappear\" - so some workaraunds are needed \n",
+ " # (other option is to define addition/multiplication in a desired way)\n",
+ " row_means=self.row_means.copy()\n",
+ " \n",
+ " max_row_mean=np.max(row_means)\n",
+ " row_means[row_means==0]=max_row_mean+1\n",
+ " to_subtract_rows=sparse.diags(row_means)*result.power(0)\n",
+ " to_subtract_rows.sort_indices() # needed to have valid .data\n",
+ " \n",
+ " subtract=to_subtract_rows.data\n",
+ " subtract[subtract==max_row_mean+1]=0\n",
+ " \n",
+ " result.data=result.data-subtract\n",
+ "# we can't do result=train_ui-to_subtract_rows since then 0 entries will \"disappear\" in csr format\n",
+ " self.col_means=np.divide(np.asarray(result.sum(axis=0).ravel())[0], np.diff(self.train_iu.indptr),\\\n",
+ " out=np.zeros(self.train_iu.shape[0]), where=np.diff(self.train_iu.indptr)!=0) # handling items without ratings\n",
+ " \n",
+ " # again - it is possible that some mean will be zero, so let's use the same workaround\n",
+ " col_means=self.col_means.copy()\n",
+ " \n",
+ " max_col_mean=np.max(col_means)\n",
+ " col_means[col_means==0]=max_col_mean+1\n",
+ " to_subtract_cols=result.power(0)*sparse.diags(col_means)\n",
+ " to_subtract_cols.sort_indices() # needed to have valid .data\n",
+ " \n",
+ " subtract=to_subtract_cols.data\n",
+ " subtract[subtract==max_col_mean+1]=0\n",
+ " \n",
+ " result.data=result.data-subtract\n",
+ "\n",
+ " return result\n",
+ " \n",
+ " \n",
+ " def recommend(self, user_code_id, item_code_id, topK=10):\n",
+ " estimations=np.tile(self.row_means[:,None], [1, self.train_ui.shape[1]]) +np.tile(self.col_means, [self.train_ui.shape[0], 1])\n",
+ " \n",
+ " top_k = defaultdict(list)\n",
+ " for nb_user, user in enumerate(estimations):\n",
+ " \n",
+ " user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]\n",
+ " for item, score in enumerate(user):\n",
+ " if item not in user_rated:\n",
+ " top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n",
+ " result=[]\n",
+ " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n",
+ " for uid, item_scores in top_k.items():\n",
+ " item_scores.sort(key=lambda x: x[1], reverse=True)\n",
+ " result.append([uid]+list(chain(*item_scores[:topK])))\n",
+ " return result\n",
+ " \n",
+ " def estimate(self, user_code_id, item_code_id, test_ui):\n",
+ " result=[]\n",
+ " for user, item in zip(*test_ui.nonzero()):\n",
+ " result.append([user_code_id[user], item_code_id[item], self.row_means[user]+self.col_means[item]])\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Training data:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n",
+ " [0, 1, 2, 3, 0, 0, 0, 0],\n",
+ " [0, 0, 0, 5, 0, 3, 4, 0]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "After subtracting rows and columns:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[ 0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ],\n",
+ " [ 0. , -0.5, 0. , 0. , 0. , 0. , 0. , 0. ],\n",
+ " [ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Recommend best unseen item:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[[0, 30, 5.0], [10, 40, 3.0], [20, 40, 5.0]]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Print estimations on unseen items:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " item | \n",
+ " est_score | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 60 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 10 | \n",
+ " 40 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20 | \n",
+ " 0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 20 | \n",
+ " 20 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 20 | \n",
+ " 70 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user item est_score\n",
+ "0 0 60 4.0\n",
+ "1 10 40 3.0\n",
+ "2 20 0 3.0\n",
+ "3 20 20 4.0\n",
+ "4 20 70 4.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
+ "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
+ "\n",
+ "toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \\\n",
+ "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n",
+ "\n",
+ "print('Training data:')\n",
+ "display(toy_train_ui.todense())\n",
+ "\n",
+ "model=selfBaselineUI()\n",
+ "print('After subtracting rows and columns:')\n",
+ "display(model.fit(toy_train_ui).todense())\n",
+ "\n",
+ "print('Recommend best unseen item:')\n",
+ "display(model.recommend(toy_user_code_id, toy_item_code_id, topK=1))\n",
+ "\n",
+ "print('Print estimations on unseen items:')\n",
+ "estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_ui))\n",
+ "estimations.columns=['user', 'item', 'est_score']\n",
+ "display(estimations)\n",
+ "\n",
+ "top_n=pd.DataFrame(model.recommend(toy_user_code_id, toy_item_code_id, topK=3))\n",
+ "\n",
+ "top_n.to_csv('Recommendations generated/toy-example/Self_BaselineUI_reco.csv', index=False, header=False)\n",
+ "\n",
+ "estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_ui))\n",
+ "estimations.to_csv('Recommendations generated/toy-example/Self_BaselineUI_estimations.csv', index=False, header=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model=selfBaselineUI()\n",
+ "model.fit(train_ui)\n",
+ "\n",
+ "top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
+ "\n",
+ "top_n.to_csv('Recommendations generated/ml-100k/Self_BaselineUI_reco.csv', index=False, header=False)\n",
+ "\n",
+ "estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
+ "estimations.to_csv('Recommendations generated/ml-100k/Self_BaselineUI_estimations.csv', index=False, header=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# project task 2: implement self-made BaselineIU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Implement recommender system which will recommend movies (which user hasn't seen) which is similar to BaselineUI\n",
+ "# but first subtract col means then row means\n",
+ "# The output should be saved in 'Recommendations generated/ml-100k/Self_BaselineIU_reco.csv'\n",
+ "# and 'Recommendations generated/ml-100k/Self_BaselineIU_estimations.csv'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Ready-made baseline - Surprise implementation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimating biases using als...\n"
+ ]
+ }
+ ],
+ "source": [
+ "import surprise as sp\n",
+ "import time\n",
+ "\n",
+ "# Based on surprise.readthedocs.io\n",
+ "def get_top_n(predictions, n=10):\n",
+ " \n",
+ " # Here we create a dictionary which items are lists of pairs (item, score)\n",
+ " top_n = defaultdict(list)\n",
+ " for uid, iid, true_r, est, _ in predictions:\n",
+ " top_n[uid].append((iid, est))\n",
+ " \n",
+ " result=[]\n",
+ " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n",
+ " for uid, user_ratings in top_n.items():\n",
+ " user_ratings.sort(key=lambda x: x[1], reverse=True)\n",
+ " result.append([uid]+list(chain(*user_ratings[:n]))) \n",
+ " return result\n",
+ "\n",
+ "\n",
+ "reader = sp.Reader(line_format='user item rating timestamp', sep='\\t')\n",
+ "trainset = sp.Dataset.load_from_file('./Datasets/ml-100k/train.csv', reader=reader)\n",
+ "trainset = trainset.build_full_trainset() # -> it is needed for using Surprise package\n",
+ "\n",
+ "testset = sp.Dataset.load_from_file('./Datasets/ml-100k/test.csv', reader=reader)\n",
+ "testset = sp.Trainset.build_testset(testset.build_full_trainset())\n",
+ "\n",
+ "algo = sp.BaselineOnly()\n",
+ "# algo = sp.BaselineOnly(bsl_options={'method':'sgd', 'reg':0, 'n_epochs':2000})\n",
+ "# observe how bad results gives above algorithm\n",
+ "# more details http://courses.ischool.berkeley.edu/i290-dm/s11/SECURE/a1-koren.pdf - chapter 2.1\n",
+ "\n",
+ "algo.fit(trainset)\n",
+ "\n",
+ "antitrainset = trainset.build_anti_testset() # We want to predict ratings of pairs (user, item) which are not in train set\n",
+ "predictions = algo.test(antitrainset)\n",
+ "\n",
+ "top_n = get_top_n(predictions, n=10)\n",
+ "\n",
+ "top_n=pd.DataFrame(top_n)\n",
+ "\n",
+ "top_n.to_csv('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', index=False, header=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RMSE: 0.9495\n",
+ "MAE: 0.7525\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "0.7524871012820799"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Compute RMSE on testset using buildin functions\n",
+ "predictions = algo.test(testset)\n",
+ "sp.accuracy.rmse(predictions, verbose=True)\n",
+ "\n",
+ "# Let's also save the results in file\n",
+ "predictions_df=[]\n",
+ "for uid, iid, true_r, est, _ in predictions:\n",
+ " predictions_df.append([uid, iid, est])\n",
+ " \n",
+ "predictions_df=pd.DataFrame(predictions_df)\n",
+ "predictions_df.to_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', index=False, header=False)\n",
+ "\n",
+ "sp.accuracy.mae(predictions, verbose=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##### Let's compare with random"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RMSE: 1.5133\n",
+ "MAE: 1.2143\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "1.2143089419556985"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# in surprise random is an algorithm predicting random value regarding to normal distribution estimated from train set\n",
+ "algo = sp.NormalPredictor()\n",
+ "algo.fit(trainset)\n",
+ "\n",
+ "antitrainset = trainset.build_anti_testset() # We want to predict ratings of pairs (user, item) which are not in train set\n",
+ "predictions = algo.test(antitrainset)\n",
+ "\n",
+ "top_n = get_top_n(predictions, n=10)\n",
+ "\n",
+ "top_n=pd.DataFrame(top_n)\n",
+ "\n",
+ "top_n.to_csv('Recommendations generated/ml-100k/Ready_Random_reco.csv', index=False, header=False)\n",
+ "\n",
+ "# Compute RMSE on testset using buildin functions\n",
+ "predictions = algo.test(testset)\n",
+ "sp.accuracy.rmse(predictions, verbose=True)\n",
+ "\n",
+ "# Let's also save the results in file\n",
+ "predictions_df=[]\n",
+ "for uid, iid, true_r, est, _ in predictions:\n",
+ " predictions_df.append([uid, iid, est])\n",
+ " \n",
+ "predictions_df=pd.DataFrame(predictions_df)\n",
+ "predictions_df.to_csv('Recommendations generated/ml-100k/Ready_Random_estimations.csv', index=False, header=False)\n",
+ "\n",
+ "sp.accuracy.mae(predictions, verbose=True)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/P1. Introduction and baseline.pdf b/P1. Introduction and baseline.pdf
new file mode 100644
index 0000000..fe035c9
Binary files /dev/null and b/P1. Introduction and baseline.pdf differ
diff --git a/P2. Evaluation.ipynb b/P2. Evaluation.ipynb
new file mode 100644
index 0000000..3caa717
--- /dev/null
+++ b/P2. Evaluation.ipynb
@@ -0,0 +1,2745 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Prepare test set"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "-"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import scipy.sparse as sparse\n",
+ "from collections import defaultdict\n",
+ "from itertools import chain\n",
+ "import random\n",
+ "from tqdm import tqdm\n",
+ "\n",
+ "# In evaluation we do not load train set - it is not needed\n",
+ "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
+ "test.columns=['user', 'item', 'rating', 'timestamp']\n",
+ "\n",
+ "test['user_code'] = test['user'].astype(\"category\").cat.codes\n",
+ "test['item_code'] = test['item'].astype(\"category\").cat.codes\n",
+ "\n",
+ "user_code_id = dict(enumerate(test['user'].astype(\"category\").cat.categories))\n",
+ "user_id_code = dict((v, k) for k, v in user_code_id.items())\n",
+ "item_code_id = dict(enumerate(test['item'].astype(\"category\").cat.categories))\n",
+ "item_id_code = dict((v, k) for k, v in item_code_id.items())\n",
+ "\n",
+ "test_ui = sparse.csr_matrix((test['rating'], (test['user_code'], test['item_code'])))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Estimations metrics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)\n",
+ "estimations_df.columns=['user', 'item' ,'score']\n",
+ "\n",
+ "estimations_df['user_code']=[user_id_code[user] for user in estimations_df['user']]\n",
+ "estimations_df['item_code']=[item_id_code[item] for item in estimations_df['item']]\n",
+ "estimations=sparse.csr_matrix((estimations_df['score'], (estimations_df['user_code'], estimations_df['item_code'])), shape=test_ui.shape)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def estimations_metrics(test_ui, estimations):\n",
+ " result=[]\n",
+ "\n",
+ " RMSE=(np.sum((estimations.data-test_ui.data)**2)/estimations.nnz)**(1/2)\n",
+ " result.append(['RMSE', RMSE])\n",
+ "\n",
+ " MAE=np.sum(abs(estimations.data-test_ui.data))/estimations.nnz\n",
+ " result.append(['MAE', MAE])\n",
+ " \n",
+ " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n",
+ " df_result.columns=list(zip(*result))[0]\n",
+ " return df_result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " RMSE | \n",
+ " MAE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.949459 | \n",
+ " 0.752487 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " RMSE MAE\n",
+ "0 0.949459 0.752487"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# in case of error (in the laboratories) you might have to switch to the other version of pandas\n",
+ "# try !pip3 install pandas=='1.0.3' (or pip if you use python 2) and restart the kernel\n",
+ "\n",
+ "estimations_metrics(test_ui, estimations)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Ranking metrics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[663, 475, 62, ..., 472, 269, 503],\n",
+ " [ 48, 313, 475, ..., 591, 175, 466],\n",
+ " [351, 313, 475, ..., 591, 175, 466],\n",
+ " ...,\n",
+ " [259, 313, 475, ..., 11, 591, 175],\n",
+ " [ 33, 313, 475, ..., 11, 591, 175],\n",
+ " [ 77, 313, 475, ..., 11, 591, 175]])"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "reco = np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')\n",
+ "# Let's ignore scores - they are not used in evaluation: \n",
+ "users=reco[:,:1]\n",
+ "items=reco[:,1::2]\n",
+ "# Let's use inner ids instead of real ones\n",
+ "users=np.vectorize(lambda x: user_id_code.setdefault(x, -1))(users)\n",
+ "items=np.vectorize(lambda x: item_id_code.setdefault(x, -1))(items) # maybe items we recommend are not in test set\n",
+ "# Let's put them into one array\n",
+ "reco=np.concatenate((users, items), axis=1)\n",
+ "reco"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def ranking_metrics(test_ui, reco, super_reactions=[], topK=10):\n",
+ " \n",
+ " nb_items=test_ui.shape[1]\n",
+ " relevant_users, super_relevant_users, prec, rec, F_1, F_05, prec_super, rec_super, ndcg, mAP, MRR, LAUC, HR=\\\n",
+ " 0,0,0,0,0,0,0,0,0,0,0,0,0\n",
+ " \n",
+ " cg = (1.0 / np.log2(np.arange(2, topK + 2)))\n",
+ " cg_sum = np.cumsum(cg)\n",
+ " \n",
+ " for (nb_user, user) in tqdm(enumerate(reco[:,0])):\n",
+ " u_rated_items=test_ui.indices[test_ui.indptr[user]:test_ui.indptr[user+1]]\n",
+ " nb_u_rated_items=len(u_rated_items)\n",
+ " if nb_u_rated_items>0: # skip users with no items in test set (still possible that there will be no super items)\n",
+ " relevant_users+=1\n",
+ " \n",
+ " u_super_items=u_rated_items[np.vectorize(lambda x: x in super_reactions)\\\n",
+ " (test_ui.data[test_ui.indptr[user]:test_ui.indptr[user+1]])]\n",
+ " # more natural seems u_super_items=[item for item in u_rated_items if test_ui[user,item] in super_reactions]\n",
+ " # but accesing test_ui[user,item] is expensive -we should avoid doing it\n",
+ " if len(u_super_items)>0:\n",
+ " super_relevant_users+=1\n",
+ " \n",
+ " user_successes=np.zeros(topK)\n",
+ " nb_user_successes=0\n",
+ " user_super_successes=np.zeros(topK)\n",
+ " nb_user_super_successes=0\n",
+ " \n",
+ " # evaluation\n",
+ " for (item_position,item) in enumerate(reco[nb_user,1:topK+1]):\n",
+ " if item in u_rated_items:\n",
+ " user_successes[item_position]=1\n",
+ " nb_user_successes+=1\n",
+ " if item in u_super_items:\n",
+ " user_super_successes[item_position]=1\n",
+ " nb_user_super_successes+=1\n",
+ " \n",
+ " prec_u=nb_user_successes/topK \n",
+ " prec+=prec_u\n",
+ " \n",
+ " rec_u=nb_user_successes/nb_u_rated_items\n",
+ " rec+=rec_u\n",
+ " \n",
+ " F_1+=2*(prec_u*rec_u)/(prec_u+rec_u) if prec_u+rec_u>0 else 0\n",
+ " F_05+=(0.5**2+1)*(prec_u*rec_u)/(0.5**2*prec_u+rec_u) if prec_u+rec_u>0 else 0\n",
+ " \n",
+ " prec_super+=nb_user_super_successes/topK\n",
+ " rec_super+=nb_user_super_successes/max(len(u_super_items),1) # to set 0 if no super items\n",
+ " ndcg+=np.dot(user_successes,cg)/cg_sum[min(topK, nb_u_rated_items)-1]\n",
+ " \n",
+ " cumsum_successes=np.cumsum(user_successes)\n",
+ " mAP+=np.dot(cumsum_successes/np.arange(1,topK+1), user_successes)/min(topK, nb_u_rated_items)\n",
+ " MRR+=1/(user_successes.nonzero()[0][0]+1) if user_successes.nonzero()[0].size>0 else 0\n",
+ " LAUC+=(np.dot(cumsum_successes, 1-user_successes)+\\\n",
+ " (nb_user_successes+nb_u_rated_items)/2*((nb_items-nb_u_rated_items)-(topK-nb_user_successes)))/\\\n",
+ " ((nb_items-nb_u_rated_items)*nb_u_rated_items)\n",
+ " \n",
+ " HR+=nb_user_successes>0\n",
+ " \n",
+ " \n",
+ " result=[]\n",
+ " result.append(('precision', prec/relevant_users))\n",
+ " result.append(('recall', rec/relevant_users))\n",
+ " result.append(('F_1', F_1/relevant_users))\n",
+ " result.append(('F_05', F_05/relevant_users))\n",
+ " result.append(('precision_super', prec_super/super_relevant_users))\n",
+ " result.append(('recall_super', rec_super/super_relevant_users))\n",
+ " result.append(('NDCG', ndcg/relevant_users))\n",
+ " result.append(('mAP', mAP/relevant_users))\n",
+ " result.append(('MRR', MRR/relevant_users))\n",
+ " result.append(('LAUC', LAUC/relevant_users))\n",
+ " result.append(('HR', HR/relevant_users))\n",
+ "\n",
+ " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n",
+ " df_result.columns=list(zip(*result))[0]\n",
+ " return df_result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "943it [00:00, 7647.02it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " precision | \n",
+ " recall | \n",
+ " F_1 | \n",
+ " F_05 | \n",
+ " precision_super | \n",
+ " recall_super | \n",
+ " NDCG | \n",
+ " mAP | \n",
+ " MRR | \n",
+ " LAUC | \n",
+ " HR | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.09141 | \n",
+ " 0.037652 | \n",
+ " 0.04603 | \n",
+ " 0.061286 | \n",
+ " 0.079614 | \n",
+ " 0.056463 | \n",
+ " 0.095957 | \n",
+ " 0.043178 | \n",
+ " 0.198193 | \n",
+ " 0.515501 | \n",
+ " 0.437964 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " precision recall F_1 F_05 precision_super recall_super \\\n",
+ "0 0.09141 0.037652 0.04603 0.061286 0.079614 0.056463 \n",
+ "\n",
+ " NDCG mAP MRR LAUC HR \n",
+ "0 0.095957 0.043178 0.198193 0.515501 0.437964 "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ranking_metrics(test_ui, reco, super_reactions=[4,5], topK=10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Diversity metrics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def diversity_metrics(test_ui, reco, topK=10):\n",
+ " \n",
+ " frequencies=defaultdict(int)\n",
+ " \n",
+ " # let's assign 0 to all items in test set\n",
+ " for item in list(set(test_ui.indices)):\n",
+ " frequencies[item]=0\n",
+ " \n",
+ " # counting frequencies\n",
+ " for item in reco[:,1:].flat:\n",
+ " frequencies[item]+=1\n",
+ " \n",
+ " nb_reco_outside_test=frequencies[-1]\n",
+ " del frequencies[-1]\n",
+ " \n",
+ " frequencies=np.array(list(frequencies.values()))\n",
+ " \n",
+ " nb_rec_items=len(frequencies[frequencies>0])\n",
+ " nb_reco_inside_test=np.sum(frequencies)\n",
+ " \n",
+ " frequencies=frequencies/np.sum(frequencies)\n",
+ " frequencies=np.sort(frequencies)\n",
+ " \n",
+ " with np.errstate(divide='ignore'): # let's put zeros put items with 0 frequency and ignore division warning\n",
+ " log_frequencies=np.nan_to_num(np.log(frequencies), posinf=0, neginf=0)\n",
+ " \n",
+ " result=[]\n",
+ " result.append(('Reco in test', nb_reco_inside_test/(nb_reco_inside_test+nb_reco_outside_test)))\n",
+ " result.append(('Test coverage', nb_rec_items/test_ui.shape[1]))\n",
+ " result.append(('Shannon', -np.dot(frequencies, log_frequencies)))\n",
+ " result.append(('Gini', np.dot(frequencies, np.arange(1-len(frequencies), len(frequencies), 2))/(len(frequencies)-1)))\n",
+ " \n",
+ " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n",
+ " df_result.columns=list(zip(*result))[0]\n",
+ " return df_result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Reco in test | \n",
+ " Test coverage | \n",
+ " Shannon | \n",
+ " Gini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 0.033911 | \n",
+ " 2.836513 | \n",
+ " 0.991139 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Reco in test Test coverage Shannon Gini\n",
+ "0 1.0 0.033911 2.836513 0.991139"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# in case of errors try !pip3 install numpy==1.18.4 (or pip if you use python 2) and restart the kernel\n",
+ "\n",
+ "import evaluation_measures as ev\n",
+ "import imp\n",
+ "imp.reload(ev)\n",
+ "\n",
+ "x=diversity_metrics(test_ui, reco, topK=10)\n",
+ "x"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# To be used in other notebooks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "943it [00:00, 7829.39it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " RMSE | \n",
+ " MAE | \n",
+ " precision | \n",
+ " recall | \n",
+ " F_1 | \n",
+ " F_05 | \n",
+ " precision_super | \n",
+ " recall_super | \n",
+ " NDCG | \n",
+ " mAP | \n",
+ " MRR | \n",
+ " LAUC | \n",
+ " HR | \n",
+ " Reco in test | \n",
+ " Test coverage | \n",
+ " Shannon | \n",
+ " Gini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.949459 | \n",
+ " 0.752487 | \n",
+ " 0.09141 | \n",
+ " 0.037652 | \n",
+ " 0.04603 | \n",
+ " 0.061286 | \n",
+ " 0.079614 | \n",
+ " 0.056463 | \n",
+ " 0.095957 | \n",
+ " 0.043178 | \n",
+ " 0.198193 | \n",
+ " 0.515501 | \n",
+ " 0.437964 | \n",
+ " 1.0 | \n",
+ " 0.033911 | \n",
+ " 2.836513 | \n",
+ " 0.991139 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " RMSE MAE precision recall F_1 F_05 \\\n",
+ "0 0.949459 0.752487 0.09141 0.037652 0.04603 0.061286 \n",
+ "\n",
+ " precision_super recall_super NDCG mAP MRR LAUC \\\n",
+ "0 0.079614 0.056463 0.095957 0.043178 0.198193 0.515501 \n",
+ "\n",
+ " HR Reco in test Test coverage Shannon Gini \n",
+ "0 0.437964 1.0 0.033911 2.836513 0.991139 "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import evaluation_measures as ev\n",
+ "import imp\n",
+ "imp.reload(ev)\n",
+ "\n",
+ "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)\n",
+ "reco=np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')\n",
+ "\n",
+ "ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
+ " estimations_df=estimations_df, \n",
+ " reco=reco,\n",
+ " super_reactions=[4,5])\n",
+ "#also you can just type ev.evaluate_all(estimations_df, reco) - I put above values as default"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "943it [00:00, 7954.38it/s]\n",
+ "943it [00:00, 4698.62it/s]\n",
+ "943it [00:00, 5104.10it/s]\n",
+ "943it [00:00, 4853.27it/s]\n",
+ "943it [00:00, 4669.78it/s]\n",
+ "943it [00:00, 4207.34it/s]\n",
+ "943it [00:00, 5248.26it/s]\n",
+ "943it [00:00, 4477.59it/s]\n",
+ "943it [00:00, 4280.31it/s]\n",
+ "943it [00:00, 3915.20it/s]\n",
+ "943it [00:00, 4648.51it/s]\n",
+ "943it [00:00, 3819.45it/s]\n",
+ "943it [00:00, 4405.24it/s]\n",
+ "943it [00:00, 4725.10it/s]\n",
+ "943it [00:00, 4426.18it/s]\n",
+ "943it [00:00, 4179.78it/s]\n",
+ "943it [00:00, 4919.92it/s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "import evaluation_measures as ev\n",
+ "import imp\n",
+ "imp.reload(ev)\n",
+ "\n",
+ "dir_path=\"Recommendations generated/ml-100k/\"\n",
+ "super_reactions=[4,5]\n",
+ "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
+ "\n",
+ "df=ev.evaluate_all(test, dir_path, super_reactions)\n",
+ "#also you can just type ev.evaluate_all() - I put above values as default"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Model | \n",
+ " RMSE | \n",
+ " MAE | \n",
+ " precision | \n",
+ " recall | \n",
+ " F_1 | \n",
+ " F_05 | \n",
+ " precision_super | \n",
+ " recall_super | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Ready_LightFM | \n",
+ " 162.703697 | \n",
+ " 160.837311 | \n",
+ " 0.349523 | \n",
+ " 0.226193 | \n",
+ " 0.225202 | \n",
+ " 0.265538 | \n",
+ " 0.246459 | \n",
+ " 0.266934 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_LightFMpureMF | \n",
+ " 8.015665 | \n",
+ " 7.520402 | \n",
+ " 0.333934 | \n",
+ " 0.216047 | \n",
+ " 0.214731 | \n",
+ " 0.253177 | \n",
+ " 0.232725 | \n",
+ " 0.254485 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_P3 | \n",
+ " 3.702446 | \n",
+ " 3.527273 | \n",
+ " 0.282185 | \n",
+ " 0.192092 | \n",
+ " 0.186749 | \n",
+ " 0.216980 | \n",
+ " 0.204185 | \n",
+ " 0.240096 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_ImplicitALS | \n",
+ " 3.267237 | \n",
+ " 3.068493 | \n",
+ " 0.252068 | \n",
+ " 0.182639 | \n",
+ " 0.175182 | \n",
+ " 0.199457 | \n",
+ " 0.167167 | \n",
+ " 0.216308 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_TopPop | \n",
+ " 2.508258 | \n",
+ " 2.217909 | \n",
+ " 0.188865 | \n",
+ " 0.116919 | \n",
+ " 0.118732 | \n",
+ " 0.141584 | \n",
+ " 0.130472 | \n",
+ " 0.137473 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_LightFMcontent | \n",
+ " 182.840876 | \n",
+ " 180.771141 | \n",
+ " 0.161294 | \n",
+ " 0.100424 | \n",
+ " 0.101736 | \n",
+ " 0.121096 | \n",
+ " 0.101395 | \n",
+ " 0.110660 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_SVD | \n",
+ " 0.953076 | \n",
+ " 0.750219 | \n",
+ " 0.094804 | \n",
+ " 0.045302 | \n",
+ " 0.051519 | \n",
+ " 0.065833 | \n",
+ " 0.083691 | \n",
+ " 0.074336 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_SVD | \n",
+ " 0.913840 | \n",
+ " 0.717167 | \n",
+ " 0.105620 | \n",
+ " 0.044070 | \n",
+ " 0.053839 | \n",
+ " 0.071381 | \n",
+ " 0.096030 | \n",
+ " 0.074982 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Baseline | \n",
+ " 0.949459 | \n",
+ " 0.752487 | \n",
+ " 0.091410 | \n",
+ " 0.037652 | \n",
+ " 0.046030 | \n",
+ " 0.061286 | \n",
+ " 0.079614 | \n",
+ " 0.056463 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_SVDBiased | \n",
+ " 0.941830 | \n",
+ " 0.742841 | \n",
+ " 0.083033 | \n",
+ " 0.034867 | \n",
+ " 0.041967 | \n",
+ " 0.055644 | \n",
+ " 0.072425 | \n",
+ " 0.054271 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_GlobalAvg | \n",
+ " 1.125760 | \n",
+ " 0.943534 | \n",
+ " 0.061188 | \n",
+ " 0.025968 | \n",
+ " 0.031383 | \n",
+ " 0.041343 | \n",
+ " 0.040558 | \n",
+ " 0.032107 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Random | \n",
+ " 1.513348 | \n",
+ " 1.214309 | \n",
+ " 0.044221 | \n",
+ " 0.019366 | \n",
+ " 0.022599 | \n",
+ " 0.029593 | \n",
+ " 0.026288 | \n",
+ " 0.018226 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_I-KNN | \n",
+ " 1.030386 | \n",
+ " 0.813067 | \n",
+ " 0.026087 | \n",
+ " 0.006908 | \n",
+ " 0.010593 | \n",
+ " 0.016046 | \n",
+ " 0.021137 | \n",
+ " 0.009522 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_I-KNNBaseline | \n",
+ " 0.935327 | \n",
+ " 0.737424 | \n",
+ " 0.002545 | \n",
+ " 0.000755 | \n",
+ " 0.001105 | \n",
+ " 0.001602 | \n",
+ " 0.002253 | \n",
+ " 0.000930 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_U-KNN | \n",
+ " 1.023495 | \n",
+ " 0.807913 | \n",
+ " 0.000742 | \n",
+ " 0.000205 | \n",
+ " 0.000305 | \n",
+ " 0.000449 | \n",
+ " 0.000536 | \n",
+ " 0.000198 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_BaselineUI | \n",
+ " 0.967585 | \n",
+ " 0.762740 | \n",
+ " 0.000954 | \n",
+ " 0.000170 | \n",
+ " 0.000278 | \n",
+ " 0.000463 | \n",
+ " 0.000644 | \n",
+ " 0.000189 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_IKNN | \n",
+ " 1.018363 | \n",
+ " 0.808793 | \n",
+ " 0.000318 | \n",
+ " 0.000108 | \n",
+ " 0.000140 | \n",
+ " 0.000189 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Model RMSE MAE precision recall \\\n",
+ "0 Ready_LightFM 162.703697 160.837311 0.349523 0.226193 \n",
+ "0 Ready_LightFMpureMF 8.015665 7.520402 0.333934 0.216047 \n",
+ "0 Self_P3 3.702446 3.527273 0.282185 0.192092 \n",
+ "0 Ready_ImplicitALS 3.267237 3.068493 0.252068 0.182639 \n",
+ "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 \n",
+ "0 Ready_LightFMcontent 182.840876 180.771141 0.161294 0.100424 \n",
+ "0 Ready_SVD 0.953076 0.750219 0.094804 0.045302 \n",
+ "0 Self_SVD 0.913840 0.717167 0.105620 0.044070 \n",
+ "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 \n",
+ "0 Ready_SVDBiased 0.941830 0.742841 0.083033 0.034867 \n",
+ "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 \n",
+ "0 Ready_Random 1.513348 1.214309 0.044221 0.019366 \n",
+ "0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 \n",
+ "0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 \n",
+ "0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 \n",
+ "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 \n",
+ "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 \n",
+ "\n",
+ " F_1 F_05 precision_super recall_super \n",
+ "0 0.225202 0.265538 0.246459 0.266934 \n",
+ "0 0.214731 0.253177 0.232725 0.254485 \n",
+ "0 0.186749 0.216980 0.204185 0.240096 \n",
+ "0 0.175182 0.199457 0.167167 0.216308 \n",
+ "0 0.118732 0.141584 0.130472 0.137473 \n",
+ "0 0.101736 0.121096 0.101395 0.110660 \n",
+ "0 0.051519 0.065833 0.083691 0.074336 \n",
+ "0 0.053839 0.071381 0.096030 0.074982 \n",
+ "0 0.046030 0.061286 0.079614 0.056463 \n",
+ "0 0.041967 0.055644 0.072425 0.054271 \n",
+ "0 0.031383 0.041343 0.040558 0.032107 \n",
+ "0 0.022599 0.029593 0.026288 0.018226 \n",
+ "0 0.010593 0.016046 0.021137 0.009522 \n",
+ "0 0.001105 0.001602 0.002253 0.000930 \n",
+ "0 0.000305 0.000449 0.000536 0.000198 \n",
+ "0 0.000278 0.000463 0.000644 0.000189 \n",
+ "0 0.000140 0.000189 0.000000 0.000000 "
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.iloc[:,:9]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Model | \n",
+ " NDCG | \n",
+ " mAP | \n",
+ " MRR | \n",
+ " LAUC | \n",
+ " HR | \n",
+ " Reco in test | \n",
+ " Test coverage | \n",
+ " Shannon | \n",
+ " Gini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Ready_LightFM | \n",
+ " 0.413969 | \n",
+ " 0.277036 | \n",
+ " 0.648029 | \n",
+ " 0.610845 | \n",
+ " 0.916225 | \n",
+ " 1.000000 | \n",
+ " 0.352814 | \n",
+ " 5.363070 | \n",
+ " 0.885116 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_LightFMpureMF | \n",
+ " 0.391316 | \n",
+ " 0.257793 | \n",
+ " 0.606204 | \n",
+ " 0.605708 | \n",
+ " 0.906681 | \n",
+ " 1.000000 | \n",
+ " 0.272006 | \n",
+ " 5.031437 | \n",
+ " 0.918177 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_P3 | \n",
+ " 0.339114 | \n",
+ " 0.204905 | \n",
+ " 0.572157 | \n",
+ " 0.593544 | \n",
+ " 0.875928 | \n",
+ " 1.000000 | \n",
+ " 0.077201 | \n",
+ " 3.875892 | \n",
+ " 0.974947 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_ImplicitALS | \n",
+ " 0.295331 | \n",
+ " 0.163847 | \n",
+ " 0.500282 | \n",
+ " 0.588672 | \n",
+ " 0.873807 | \n",
+ " 0.999894 | \n",
+ " 0.497835 | \n",
+ " 5.727745 | \n",
+ " 0.825683 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_TopPop | \n",
+ " 0.214651 | \n",
+ " 0.111707 | \n",
+ " 0.400939 | \n",
+ " 0.555546 | \n",
+ " 0.765642 | \n",
+ " 1.000000 | \n",
+ " 0.038961 | \n",
+ " 3.159079 | \n",
+ " 0.987317 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_LightFMcontent | \n",
+ " 0.184311 | \n",
+ " 0.091346 | \n",
+ " 0.352019 | \n",
+ " 0.547187 | \n",
+ " 0.705196 | \n",
+ " 0.979533 | \n",
+ " 0.269120 | \n",
+ " 4.940084 | \n",
+ " 0.924146 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_SVD | \n",
+ " 0.107620 | \n",
+ " 0.051155 | \n",
+ " 0.234251 | \n",
+ " 0.519361 | \n",
+ " 0.490986 | \n",
+ " 0.993425 | \n",
+ " 0.206349 | \n",
+ " 4.406898 | \n",
+ " 0.953781 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_SVD | \n",
+ " 0.109138 | \n",
+ " 0.051857 | \n",
+ " 0.202054 | \n",
+ " 0.518772 | \n",
+ " 0.478261 | \n",
+ " 0.872959 | \n",
+ " 0.144300 | \n",
+ " 3.912577 | \n",
+ " 0.971609 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Baseline | \n",
+ " 0.095957 | \n",
+ " 0.043178 | \n",
+ " 0.198193 | \n",
+ " 0.515501 | \n",
+ " 0.437964 | \n",
+ " 1.000000 | \n",
+ " 0.033911 | \n",
+ " 2.836513 | \n",
+ " 0.991139 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_SVDBiased | \n",
+ " 0.090974 | \n",
+ " 0.041243 | \n",
+ " 0.195741 | \n",
+ " 0.514084 | \n",
+ " 0.418876 | \n",
+ " 0.998409 | \n",
+ " 0.168831 | \n",
+ " 4.152102 | \n",
+ " 0.964603 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_GlobalAvg | \n",
+ " 0.067695 | \n",
+ " 0.027470 | \n",
+ " 0.171187 | \n",
+ " 0.509546 | \n",
+ " 0.384942 | \n",
+ " 1.000000 | \n",
+ " 0.025974 | \n",
+ " 2.711772 | \n",
+ " 0.992003 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Random | \n",
+ " 0.047273 | \n",
+ " 0.017729 | \n",
+ " 0.114687 | \n",
+ " 0.506181 | \n",
+ " 0.301166 | \n",
+ " 0.986002 | \n",
+ " 0.184704 | \n",
+ " 5.093324 | \n",
+ " 0.907405 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_I-KNN | \n",
+ " 0.024214 | \n",
+ " 0.008958 | \n",
+ " 0.048068 | \n",
+ " 0.499885 | \n",
+ " 0.154825 | \n",
+ " 0.402333 | \n",
+ " 0.434343 | \n",
+ " 5.133650 | \n",
+ " 0.877999 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_I-KNNBaseline | \n",
+ " 0.003444 | \n",
+ " 0.001362 | \n",
+ " 0.011760 | \n",
+ " 0.496724 | \n",
+ " 0.021209 | \n",
+ " 0.482821 | \n",
+ " 0.059885 | \n",
+ " 2.232578 | \n",
+ " 0.994487 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_U-KNN | \n",
+ " 0.000845 | \n",
+ " 0.000274 | \n",
+ " 0.002744 | \n",
+ " 0.496441 | \n",
+ " 0.007423 | \n",
+ " 0.602121 | \n",
+ " 0.010823 | \n",
+ " 2.089186 | \n",
+ " 0.995706 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_BaselineUI | \n",
+ " 0.000752 | \n",
+ " 0.000168 | \n",
+ " 0.001677 | \n",
+ " 0.496424 | \n",
+ " 0.009544 | \n",
+ " 0.600530 | \n",
+ " 0.005051 | \n",
+ " 1.803126 | \n",
+ " 0.996380 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_IKNN | \n",
+ " 0.000214 | \n",
+ " 0.000037 | \n",
+ " 0.000368 | \n",
+ " 0.496391 | \n",
+ " 0.003181 | \n",
+ " 0.392153 | \n",
+ " 0.115440 | \n",
+ " 4.174741 | \n",
+ " 0.965327 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Model NDCG mAP MRR LAUC HR \\\n",
+ "0 Ready_LightFM 0.413969 0.277036 0.648029 0.610845 0.916225 \n",
+ "0 Ready_LightFMpureMF 0.391316 0.257793 0.606204 0.605708 0.906681 \n",
+ "0 Self_P3 0.339114 0.204905 0.572157 0.593544 0.875928 \n",
+ "0 Ready_ImplicitALS 0.295331 0.163847 0.500282 0.588672 0.873807 \n",
+ "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n",
+ "0 Ready_LightFMcontent 0.184311 0.091346 0.352019 0.547187 0.705196 \n",
+ "0 Ready_SVD 0.107620 0.051155 0.234251 0.519361 0.490986 \n",
+ "0 Self_SVD 0.109138 0.051857 0.202054 0.518772 0.478261 \n",
+ "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n",
+ "0 Ready_SVDBiased 0.090974 0.041243 0.195741 0.514084 0.418876 \n",
+ "0 Self_GlobalAvg 0.067695 0.027470 0.171187 0.509546 0.384942 \n",
+ "0 Ready_Random 0.047273 0.017729 0.114687 0.506181 0.301166 \n",
+ "0 Ready_I-KNN 0.024214 0.008958 0.048068 0.499885 0.154825 \n",
+ "0 Ready_I-KNNBaseline 0.003444 0.001362 0.011760 0.496724 0.021209 \n",
+ "0 Ready_U-KNN 0.000845 0.000274 0.002744 0.496441 0.007423 \n",
+ "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n",
+ "0 Self_IKNN 0.000214 0.000037 0.000368 0.496391 0.003181 \n",
+ "\n",
+ " Reco in test Test coverage Shannon Gini \n",
+ "0 1.000000 0.352814 5.363070 0.885116 \n",
+ "0 1.000000 0.272006 5.031437 0.918177 \n",
+ "0 1.000000 0.077201 3.875892 0.974947 \n",
+ "0 0.999894 0.497835 5.727745 0.825683 \n",
+ "0 1.000000 0.038961 3.159079 0.987317 \n",
+ "0 0.979533 0.269120 4.940084 0.924146 \n",
+ "0 0.993425 0.206349 4.406898 0.953781 \n",
+ "0 0.872959 0.144300 3.912577 0.971609 \n",
+ "0 1.000000 0.033911 2.836513 0.991139 \n",
+ "0 0.998409 0.168831 4.152102 0.964603 \n",
+ "0 1.000000 0.025974 2.711772 0.992003 \n",
+ "0 0.986002 0.184704 5.093324 0.907405 \n",
+ "0 0.402333 0.434343 5.133650 0.877999 \n",
+ "0 0.482821 0.059885 2.232578 0.994487 \n",
+ "0 0.602121 0.010823 2.089186 0.995706 \n",
+ "0 0.600530 0.005051 1.803126 0.996380 \n",
+ "0 0.392153 0.115440 4.174741 0.965327 "
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.iloc[:,np.append(0,np.arange(9, df.shape[1]))]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Check metrics on toy dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "3it [00:00, 4233.82it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Model | \n",
+ " RMSE | \n",
+ " MAE | \n",
+ " precision | \n",
+ " recall | \n",
+ " F_1 | \n",
+ " F_05 | \n",
+ " precision_super | \n",
+ " recall_super | \n",
+ " NDCG | \n",
+ " mAP | \n",
+ " MRR | \n",
+ " LAUC | \n",
+ " HR | \n",
+ " Reco in test | \n",
+ " Test coverage | \n",
+ " Shannon | \n",
+ " Gini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Self_BaselineUI | \n",
+ " 1.612452 | \n",
+ " 1.4 | \n",
+ " 0.444444 | \n",
+ " 0.888889 | \n",
+ " 0.555556 | \n",
+ " 0.478632 | \n",
+ " 0.333333 | \n",
+ " 0.75 | \n",
+ " 0.676907 | \n",
+ " 0.574074 | \n",
+ " 0.611111 | \n",
+ " 0.638889 | \n",
+ " 1.0 | \n",
+ " 0.888889 | \n",
+ " 0.8 | \n",
+ " 1.386294 | \n",
+ " 0.25 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Model RMSE MAE precision recall F_1 F_05 \\\n",
+ "0 Self_BaselineUI 1.612452 1.4 0.444444 0.888889 0.555556 0.478632 \n",
+ "\n",
+ " precision_super recall_super NDCG mAP MRR LAUC HR \\\n",
+ "0 0.333333 0.75 0.676907 0.574074 0.611111 0.638889 1.0 \n",
+ "\n",
+ " Reco in test Test coverage Shannon Gini \n",
+ "0 0.888889 0.8 1.386294 0.25 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Training data:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n",
+ " [0, 1, 2, 3, 0, 0, 0, 0],\n",
+ " [0, 0, 0, 5, 0, 3, 4, 0]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test data:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[0, 0, 0, 0, 0, 0, 3, 0],\n",
+ " [0, 0, 0, 0, 5, 0, 0, 0],\n",
+ " [5, 0, 4, 0, 0, 0, 0, 2]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Recommendations:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 30 | \n",
+ " 5.0 | \n",
+ " 20 | \n",
+ " 4.0 | \n",
+ " 60 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 10 | \n",
+ " 40 | \n",
+ " 3.0 | \n",
+ " 60 | \n",
+ " 2.0 | \n",
+ " 70 | \n",
+ " 2.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20 | \n",
+ " 40 | \n",
+ " 5.0 | \n",
+ " 20 | \n",
+ " 4.0 | \n",
+ " 70 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 4 5 6\n",
+ "0 0 30 5.0 20 4.0 60 4.0\n",
+ "1 10 40 3.0 60 2.0 70 2.0\n",
+ "2 20 40 5.0 20 4.0 70 4.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimations:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " item | \n",
+ " est_score | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 60 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 10 | \n",
+ " 40 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20 | \n",
+ " 0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 20 | \n",
+ " 20 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 20 | \n",
+ " 70 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user item est_score\n",
+ "0 0 60 4.0\n",
+ "1 10 40 3.0\n",
+ "2 20 0 3.0\n",
+ "3 20 20 4.0\n",
+ "4 20 70 4.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import evaluation_measures as ev\n",
+ "import imp\n",
+ "import helpers\n",
+ "imp.reload(ev)\n",
+ "\n",
+ "dir_path=\"Recommendations generated/toy-example/\"\n",
+ "super_reactions=[4,5]\n",
+ "test=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None)\n",
+ "\n",
+ "display(ev.evaluate_all(test, dir_path, super_reactions, topK=3))\n",
+ "#also you can just type ev.evaluate_all() - I put above values as default\n",
+ "\n",
+ "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
+ "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
+ "reco=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_reco.csv', header=None)\n",
+ "estimations=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_estimations.csv', names=['user', 'item', 'est_score'])\n",
+ "toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \\\n",
+ "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n",
+ "\n",
+ "print('Training data:')\n",
+ "display(toy_train_ui.todense())\n",
+ "\n",
+ "print('Test data:')\n",
+ "display(toy_test_ui.todense())\n",
+ "\n",
+ "print('Recommendations:')\n",
+ "display(reco)\n",
+ "\n",
+ "print('Estimations:')\n",
+ "display(estimations)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# A/B testing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Suppose we had\n",
+ "A_successes=1000\n",
+ "A_failures=9000\n",
+ "\n",
+ "B_successes=1500\n",
+ "B_failures=12000"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Confidence intervals"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " successes | \n",
+ " failures | \n",
+ " conversion | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " A | \n",
+ " 1000 | \n",
+ " 1500 | \n",
+ " 0.4000 | \n",
+ "
\n",
+ " \n",
+ " B | \n",
+ " 9000 | \n",
+ " 12000 | \n",
+ " 0.4286 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " successes failures conversion\n",
+ "A 1000 1500 0.4000\n",
+ "B 9000 12000 0.4286"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df=pd.DataFrame({'successes': [A_successes, A_failures],'failures': [B_successes,B_failures]}, index=['A','B'])\n",
+ "df['conversion']=df.apply(lambda x: round(x['successes']/(x['successes']+x['failures']),4), axis=1)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " successes | \n",
+ " failures | \n",
+ " conversion | \n",
+ " conf_interval | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " A | \n",
+ " 1000 | \n",
+ " 1500 | \n",
+ " 0.4000 | \n",
+ " [0.3808, 0.4194] | \n",
+ "
\n",
+ " \n",
+ " B | \n",
+ " 9000 | \n",
+ " 12000 | \n",
+ " 0.4286 | \n",
+ " [0.4219, 0.4353] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " successes failures conversion conf_interval\n",
+ "A 1000 1500 0.4000 [0.3808, 0.4194]\n",
+ "B 9000 12000 0.4286 [0.4219, 0.4353]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "significance=0.95\n",
+ "\n",
+ "from statsmodels.stats.proportion import proportion_confint\n",
+ "df['conf_interval']=df.apply(lambda x: [round(i,4) for i in proportion_confint(count=x['successes'], nobs=x['successes']+x['failures'], alpha=1-significance, method='binom_test')], axis=1)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "p-value: 0.006729080907452261\n"
+ ]
+ }
+ ],
+ "source": [
+ "from scipy.stats import chi2_contingency\n",
+ "cond = np.array([[A_successes, A_failures], [B_successes, B_failures]])\n",
+ "print(f'p-value: {chi2_contingency(cond)[1]}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### How many observations do we need? Power analysis "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Effect size: 0.02041241452319317\n",
+ "Samples needed: 18837\n"
+ ]
+ }
+ ],
+ "source": [
+ "# sample size calculator: https://www.evanmiller.org/ab-testing/sample-size.html \n",
+ "# for now let's assume conversion from control group is known\n",
+ "\n",
+ "from statsmodels.stats.power import GofChisquarePower\n",
+ "from statsmodels.stats.gof import chisquare_effectsize\n",
+ "\n",
+ "effect_size=chisquare_effectsize([df['conversion']['A'], 1-df['conversion']['A']], \n",
+ " [df['conversion']['A']+0.01, 1-df['conversion']['A']-0.01])\n",
+ "print(f'Effect size: {effect_size}')\n",
+ "print(f'Samples needed: {round(GofChisquarePower().solve_power(effect_size, power=.8, n_bins=2, alpha=0.05))}')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Effect size: 0.07001400420140048\n",
+ "Samples needed: 1601\n"
+ ]
+ }
+ ],
+ "source": [
+ "# for now let's assume conversion from control group is known\n",
+ "# it's not correct looking at https://www.evanmiller.org/ab-testing/sample-size.html\n",
+ "from statsmodels.stats.power import GofChisquarePower\n",
+ "from statsmodels.stats.gof import chisquare_effectsize\n",
+ "n_levels_variable_a = 1 # to verify\n",
+ "n_levels_variable_b = 2\n",
+ "\n",
+ "effect_size=chisquare_effectsize([0.15, 0.85], [0.125,0.875])\n",
+ "print(f'Effect size: {effect_size}')\n",
+ "print(f'Samples needed: {round(GofChisquarePower().solve_power(effect_size, power=.8, n_bins=(n_levels_variable_a)*(n_levels_variable_b), alpha=0.05))}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Sample recommendations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Here is what user rated high:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " rating | \n",
+ " title | \n",
+ " genres | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 41281 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Gone with the Wind (1939) | \n",
+ " Drama, Romance, War | \n",
+ "
\n",
+ " \n",
+ " 28880 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Pinocchio (1940) | \n",
+ " Animation, Children's | \n",
+ "
\n",
+ " \n",
+ " 36888 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Backbeat (1993) | \n",
+ " Drama, Musical | \n",
+ "
\n",
+ " \n",
+ " 36713 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Lone Star (1996) | \n",
+ " Drama, Mystery | \n",
+ "
\n",
+ " \n",
+ " 36122 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Silence of the Lambs, The (1991) | \n",
+ " Drama, Thriller | \n",
+ "
\n",
+ " \n",
+ " 32783 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Muriel's Wedding (1994) | \n",
+ " Comedy, Romance | \n",
+ "
\n",
+ " \n",
+ " 30950 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Rosewood (1997) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 30386 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Manchurian Candidate, The (1962) | \n",
+ " Film-Noir, Thriller | \n",
+ "
\n",
+ " \n",
+ " 29411 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Psycho (1960) | \n",
+ " Horror, Romance, Thriller | \n",
+ "
\n",
+ " \n",
+ " 27655 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Vertigo (1958) | \n",
+ " Mystery, Thriller | \n",
+ "
\n",
+ " \n",
+ " 14735 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Raising Arizona (1987) | \n",
+ " Comedy | \n",
+ "
\n",
+ " \n",
+ " 27563 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Young Frankenstein (1974) | \n",
+ " Comedy, Horror | \n",
+ "
\n",
+ " \n",
+ " 26524 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Everyone Says I Love You (1996) | \n",
+ " Comedy, Musical, Romance | \n",
+ "
\n",
+ " \n",
+ " 25618 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Citizen Kane (1941) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 23714 | \n",
+ " 437 | \n",
+ " 5 | \n",
+ " Casablanca (1942) | \n",
+ " Drama, Romance, War | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user rating title \\\n",
+ "41281 437 5 Gone with the Wind (1939) \n",
+ "28880 437 5 Pinocchio (1940) \n",
+ "36888 437 5 Backbeat (1993) \n",
+ "36713 437 5 Lone Star (1996) \n",
+ "36122 437 5 Silence of the Lambs, The (1991) \n",
+ "32783 437 5 Muriel's Wedding (1994) \n",
+ "30950 437 5 Rosewood (1997) \n",
+ "30386 437 5 Manchurian Candidate, The (1962) \n",
+ "29411 437 5 Psycho (1960) \n",
+ "27655 437 5 Vertigo (1958) \n",
+ "14735 437 5 Raising Arizona (1987) \n",
+ "27563 437 5 Young Frankenstein (1974) \n",
+ "26524 437 5 Everyone Says I Love You (1996) \n",
+ "25618 437 5 Citizen Kane (1941) \n",
+ "23714 437 5 Casablanca (1942) \n",
+ "\n",
+ " genres \n",
+ "41281 Drama, Romance, War \n",
+ "28880 Animation, Children's \n",
+ "36888 Drama, Musical \n",
+ "36713 Drama, Mystery \n",
+ "36122 Drama, Thriller \n",
+ "32783 Comedy, Romance \n",
+ "30950 Drama \n",
+ "30386 Film-Noir, Thriller \n",
+ "29411 Horror, Romance, Thriller \n",
+ "27655 Mystery, Thriller \n",
+ "14735 Comedy \n",
+ "27563 Comedy, Horror \n",
+ "26524 Comedy, Musical, Romance \n",
+ "25618 Drama \n",
+ "23714 Drama, Romance, War "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Here is what we recommend:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " rec_nb | \n",
+ " title | \n",
+ " genres | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 435 | \n",
+ " 437.0 | \n",
+ " 1 | \n",
+ " Great Day in Harlem, A (1994) | \n",
+ " Documentary | \n",
+ "
\n",
+ " \n",
+ " 1377 | \n",
+ " 437.0 | \n",
+ " 2 | \n",
+ " Tough and Deadly (1995) | \n",
+ " Action, Drama, Thriller | \n",
+ "
\n",
+ " \n",
+ " 2319 | \n",
+ " 437.0 | \n",
+ " 3 | \n",
+ " Aiqing wansui (1994) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 3261 | \n",
+ " 437.0 | \n",
+ " 4 | \n",
+ " Delta of Venus (1994) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 5145 | \n",
+ " 437.0 | \n",
+ " 5 | \n",
+ " Saint of Fort Washington, The (1993) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 6087 | \n",
+ " 437.0 | \n",
+ " 6 | \n",
+ " Celestial Clockwork (1994) | \n",
+ " Comedy | \n",
+ "
\n",
+ " \n",
+ " 7030 | \n",
+ " 437.0 | \n",
+ " 7 | \n",
+ " Some Mother's Son (1996) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 8924 | \n",
+ " 437.0 | \n",
+ " 8 | \n",
+ " Maya Lin: A Strong Clear Vision (1994) | \n",
+ " Documentary | \n",
+ "
\n",
+ " \n",
+ " 7970 | \n",
+ " 437.0 | \n",
+ " 9 | \n",
+ " Prefontaine (1997) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 8485 | \n",
+ " 437.0 | \n",
+ " 10 | \n",
+ " Santa with Muscles (1996) | \n",
+ " Comedy | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user rec_nb title \\\n",
+ "435 437.0 1 Great Day in Harlem, A (1994) \n",
+ "1377 437.0 2 Tough and Deadly (1995) \n",
+ "2319 437.0 3 Aiqing wansui (1994) \n",
+ "3261 437.0 4 Delta of Venus (1994) \n",
+ "5145 437.0 5 Saint of Fort Washington, The (1993) \n",
+ "6087 437.0 6 Celestial Clockwork (1994) \n",
+ "7030 437.0 7 Some Mother's Son (1996) \n",
+ "8924 437.0 8 Maya Lin: A Strong Clear Vision (1994) \n",
+ "7970 437.0 9 Prefontaine (1997) \n",
+ "8485 437.0 10 Santa with Muscles (1996) \n",
+ "\n",
+ " genres \n",
+ "435 Documentary \n",
+ "1377 Action, Drama, Thriller \n",
+ "2319 Drama \n",
+ "3261 Drama \n",
+ "5145 Drama \n",
+ "6087 Comedy \n",
+ "7030 Drama \n",
+ "8924 Documentary \n",
+ "7970 Drama \n",
+ "8485 Comedy "
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
+ "items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n",
+ "\n",
+ "user=random.choice(list(set(train['user'])))\n",
+ "\n",
+ "train_content=pd.merge(train, items, left_on='item', right_on='id')\n",
+ "\n",
+ "print('Here is what user rated high:')\n",
+ "display(train_content[train_content['user']==user][['user', 'rating', 'title', 'genres']]\\\n",
+ " .sort_values(by='rating', ascending=False)[:15])\n",
+ "\n",
+ "reco = np.loadtxt('Recommendations generated/ml-100k/Self_BaselineUI_reco.csv', delimiter=',')\n",
+ "items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n",
+ "\n",
+ "# Let's ignore scores - they are not used in evaluation: \n",
+ "reco_users=reco[:,:1]\n",
+ "reco_items=reco[:,1::2]\n",
+ "# Let's put them into one array\n",
+ "reco=np.concatenate((reco_users, reco_items), axis=1)\n",
+ "\n",
+ "# Let's rebuild it user-item dataframe\n",
+ "recommended=[]\n",
+ "for row in reco:\n",
+ " for rec_nb, entry in enumerate(row[1:]):\n",
+ " recommended.append((row[0], rec_nb+1, entry))\n",
+ "recommended=pd.DataFrame(recommended, columns=['user','rec_nb', 'item'])\n",
+ "\n",
+ "recommended_content=pd.merge(recommended, items, left_on='item', right_on='id')\n",
+ "\n",
+ "print('Here is what we recommend:')\n",
+ "recommended_content[recommended_content['user']==user][['user', 'rec_nb', 'title', 'genres']].sort_values(by='rec_nb')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# project task 3: implement some other evaluation measure"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# it may be your idea, modification of what we have already implemented \n",
+ "# (for example Hit2 rate which would count as a success users whoreceived at least 2 relevant recommendations) \n",
+ "# or something well-known\n",
+ "# expected output: modification of evaluation_measures.py such that evaluate_all will also display your measure"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "943it [00:00, 5476.88it/s]\n",
+ "943it [00:00, 4421.14it/s]\n",
+ "943it [00:00, 5056.87it/s]\n",
+ "943it [00:00, 5642.22it/s]\n",
+ "943it [00:00, 2776.13it/s]\n",
+ "943it [00:00, 3004.22it/s]\n",
+ "943it [00:00, 3802.86it/s]\n",
+ "943it [00:00, 3421.26it/s]\n",
+ "943it [00:00, 5077.51it/s]\n",
+ "943it [00:00, 4927.51it/s]\n",
+ "943it [00:00, 4246.38it/s]\n",
+ "943it [00:00, 4295.31it/s]\n",
+ "943it [00:00, 4362.79it/s]\n",
+ "943it [00:00, 6241.10it/s]\n",
+ "943it [00:00, 4318.95it/s]\n",
+ "943it [00:00, 5054.75it/s]\n",
+ "943it [00:00, 3839.80it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Model | \n",
+ " RMSE | \n",
+ " MAE | \n",
+ " precision | \n",
+ " recall | \n",
+ " F_1 | \n",
+ " F_05 | \n",
+ " precision_super | \n",
+ " recall_super | \n",
+ " NDCG | \n",
+ " mAP | \n",
+ " MRR | \n",
+ " LAUC | \n",
+ " HR | \n",
+ " Reco in test | \n",
+ " Test coverage | \n",
+ " Shannon | \n",
+ " Gini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Ready_LightFM | \n",
+ " 162.703697 | \n",
+ " 160.837311 | \n",
+ " 0.349523 | \n",
+ " 0.226193 | \n",
+ " 0.225202 | \n",
+ " 0.265538 | \n",
+ " 0.246459 | \n",
+ " 0.266934 | \n",
+ " 0.413969 | \n",
+ " 0.277036 | \n",
+ " 0.648029 | \n",
+ " 0.610845 | \n",
+ " 0.916225 | \n",
+ " 1.000000 | \n",
+ " 0.352814 | \n",
+ " 5.363070 | \n",
+ " 0.885116 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_LightFMpureMF | \n",
+ " 8.015665 | \n",
+ " 7.520402 | \n",
+ " 0.333934 | \n",
+ " 0.216047 | \n",
+ " 0.214731 | \n",
+ " 0.253177 | \n",
+ " 0.232725 | \n",
+ " 0.254485 | \n",
+ " 0.391316 | \n",
+ " 0.257793 | \n",
+ " 0.606204 | \n",
+ " 0.605708 | \n",
+ " 0.906681 | \n",
+ " 1.000000 | \n",
+ " 0.272006 | \n",
+ " 5.031437 | \n",
+ " 0.918177 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_P3 | \n",
+ " 3.702446 | \n",
+ " 3.527273 | \n",
+ " 0.282185 | \n",
+ " 0.192092 | \n",
+ " 0.186749 | \n",
+ " 0.216980 | \n",
+ " 0.204185 | \n",
+ " 0.240096 | \n",
+ " 0.339114 | \n",
+ " 0.204905 | \n",
+ " 0.572157 | \n",
+ " 0.593544 | \n",
+ " 0.875928 | \n",
+ " 1.000000 | \n",
+ " 0.077201 | \n",
+ " 3.875892 | \n",
+ " 0.974947 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_ImplicitALS | \n",
+ " 3.267237 | \n",
+ " 3.068493 | \n",
+ " 0.252068 | \n",
+ " 0.182639 | \n",
+ " 0.175182 | \n",
+ " 0.199457 | \n",
+ " 0.167167 | \n",
+ " 0.216308 | \n",
+ " 0.295331 | \n",
+ " 0.163847 | \n",
+ " 0.500282 | \n",
+ " 0.588672 | \n",
+ " 0.873807 | \n",
+ " 0.999894 | \n",
+ " 0.497835 | \n",
+ " 5.727745 | \n",
+ " 0.825683 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_TopPop | \n",
+ " 2.508258 | \n",
+ " 2.217909 | \n",
+ " 0.188865 | \n",
+ " 0.116919 | \n",
+ " 0.118732 | \n",
+ " 0.141584 | \n",
+ " 0.130472 | \n",
+ " 0.137473 | \n",
+ " 0.214651 | \n",
+ " 0.111707 | \n",
+ " 0.400939 | \n",
+ " 0.555546 | \n",
+ " 0.765642 | \n",
+ " 1.000000 | \n",
+ " 0.038961 | \n",
+ " 3.159079 | \n",
+ " 0.987317 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_LightFMcontent | \n",
+ " 182.840876 | \n",
+ " 180.771141 | \n",
+ " 0.161294 | \n",
+ " 0.100424 | \n",
+ " 0.101736 | \n",
+ " 0.121096 | \n",
+ " 0.101395 | \n",
+ " 0.110660 | \n",
+ " 0.184311 | \n",
+ " 0.091346 | \n",
+ " 0.352019 | \n",
+ " 0.547187 | \n",
+ " 0.705196 | \n",
+ " 0.979533 | \n",
+ " 0.269120 | \n",
+ " 4.940084 | \n",
+ " 0.924146 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_SVD | \n",
+ " 0.953076 | \n",
+ " 0.750219 | \n",
+ " 0.094804 | \n",
+ " 0.045302 | \n",
+ " 0.051519 | \n",
+ " 0.065833 | \n",
+ " 0.083691 | \n",
+ " 0.074336 | \n",
+ " 0.107620 | \n",
+ " 0.051155 | \n",
+ " 0.234251 | \n",
+ " 0.519361 | \n",
+ " 0.490986 | \n",
+ " 0.993425 | \n",
+ " 0.206349 | \n",
+ " 4.406898 | \n",
+ " 0.953781 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_SVD | \n",
+ " 0.913840 | \n",
+ " 0.717167 | \n",
+ " 0.105620 | \n",
+ " 0.044070 | \n",
+ " 0.053839 | \n",
+ " 0.071381 | \n",
+ " 0.096030 | \n",
+ " 0.074982 | \n",
+ " 0.109138 | \n",
+ " 0.051857 | \n",
+ " 0.202054 | \n",
+ " 0.518772 | \n",
+ " 0.478261 | \n",
+ " 0.872959 | \n",
+ " 0.144300 | \n",
+ " 3.912577 | \n",
+ " 0.971609 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Baseline | \n",
+ " 0.949459 | \n",
+ " 0.752487 | \n",
+ " 0.091410 | \n",
+ " 0.037652 | \n",
+ " 0.046030 | \n",
+ " 0.061286 | \n",
+ " 0.079614 | \n",
+ " 0.056463 | \n",
+ " 0.095957 | \n",
+ " 0.043178 | \n",
+ " 0.198193 | \n",
+ " 0.515501 | \n",
+ " 0.437964 | \n",
+ " 1.000000 | \n",
+ " 0.033911 | \n",
+ " 2.836513 | \n",
+ " 0.991139 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_SVDBiased | \n",
+ " 0.941830 | \n",
+ " 0.742841 | \n",
+ " 0.083033 | \n",
+ " 0.034867 | \n",
+ " 0.041967 | \n",
+ " 0.055644 | \n",
+ " 0.072425 | \n",
+ " 0.054271 | \n",
+ " 0.090974 | \n",
+ " 0.041243 | \n",
+ " 0.195741 | \n",
+ " 0.514084 | \n",
+ " 0.418876 | \n",
+ " 0.998409 | \n",
+ " 0.168831 | \n",
+ " 4.152102 | \n",
+ " 0.964603 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_GlobalAvg | \n",
+ " 1.125760 | \n",
+ " 0.943534 | \n",
+ " 0.061188 | \n",
+ " 0.025968 | \n",
+ " 0.031383 | \n",
+ " 0.041343 | \n",
+ " 0.040558 | \n",
+ " 0.032107 | \n",
+ " 0.067695 | \n",
+ " 0.027470 | \n",
+ " 0.171187 | \n",
+ " 0.509546 | \n",
+ " 0.384942 | \n",
+ " 1.000000 | \n",
+ " 0.025974 | \n",
+ " 2.711772 | \n",
+ " 0.992003 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_Random | \n",
+ " 1.513348 | \n",
+ " 1.214309 | \n",
+ " 0.044221 | \n",
+ " 0.019366 | \n",
+ " 0.022599 | \n",
+ " 0.029593 | \n",
+ " 0.026288 | \n",
+ " 0.018226 | \n",
+ " 0.047273 | \n",
+ " 0.017729 | \n",
+ " 0.114687 | \n",
+ " 0.506181 | \n",
+ " 0.301166 | \n",
+ " 0.986002 | \n",
+ " 0.184704 | \n",
+ " 5.093324 | \n",
+ " 0.907405 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_I-KNN | \n",
+ " 1.030386 | \n",
+ " 0.813067 | \n",
+ " 0.026087 | \n",
+ " 0.006908 | \n",
+ " 0.010593 | \n",
+ " 0.016046 | \n",
+ " 0.021137 | \n",
+ " 0.009522 | \n",
+ " 0.024214 | \n",
+ " 0.008958 | \n",
+ " 0.048068 | \n",
+ " 0.499885 | \n",
+ " 0.154825 | \n",
+ " 0.402333 | \n",
+ " 0.434343 | \n",
+ " 5.133650 | \n",
+ " 0.877999 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_I-KNNBaseline | \n",
+ " 0.935327 | \n",
+ " 0.737424 | \n",
+ " 0.002545 | \n",
+ " 0.000755 | \n",
+ " 0.001105 | \n",
+ " 0.001602 | \n",
+ " 0.002253 | \n",
+ " 0.000930 | \n",
+ " 0.003444 | \n",
+ " 0.001362 | \n",
+ " 0.011760 | \n",
+ " 0.496724 | \n",
+ " 0.021209 | \n",
+ " 0.482821 | \n",
+ " 0.059885 | \n",
+ " 2.232578 | \n",
+ " 0.994487 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Ready_U-KNN | \n",
+ " 1.023495 | \n",
+ " 0.807913 | \n",
+ " 0.000742 | \n",
+ " 0.000205 | \n",
+ " 0.000305 | \n",
+ " 0.000449 | \n",
+ " 0.000536 | \n",
+ " 0.000198 | \n",
+ " 0.000845 | \n",
+ " 0.000274 | \n",
+ " 0.002744 | \n",
+ " 0.496441 | \n",
+ " 0.007423 | \n",
+ " 0.602121 | \n",
+ " 0.010823 | \n",
+ " 2.089186 | \n",
+ " 0.995706 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_BaselineUI | \n",
+ " 0.967585 | \n",
+ " 0.762740 | \n",
+ " 0.000954 | \n",
+ " 0.000170 | \n",
+ " 0.000278 | \n",
+ " 0.000463 | \n",
+ " 0.000644 | \n",
+ " 0.000189 | \n",
+ " 0.000752 | \n",
+ " 0.000168 | \n",
+ " 0.001677 | \n",
+ " 0.496424 | \n",
+ " 0.009544 | \n",
+ " 0.600530 | \n",
+ " 0.005051 | \n",
+ " 1.803126 | \n",
+ " 0.996380 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Self_IKNN | \n",
+ " 1.018363 | \n",
+ " 0.808793 | \n",
+ " 0.000318 | \n",
+ " 0.000108 | \n",
+ " 0.000140 | \n",
+ " 0.000189 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000214 | \n",
+ " 0.000037 | \n",
+ " 0.000368 | \n",
+ " 0.496391 | \n",
+ " 0.003181 | \n",
+ " 0.392153 | \n",
+ " 0.115440 | \n",
+ " 4.174741 | \n",
+ " 0.965327 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Model RMSE MAE precision recall \\\n",
+ "0 Ready_LightFM 162.703697 160.837311 0.349523 0.226193 \n",
+ "0 Ready_LightFMpureMF 8.015665 7.520402 0.333934 0.216047 \n",
+ "0 Self_P3 3.702446 3.527273 0.282185 0.192092 \n",
+ "0 Ready_ImplicitALS 3.267237 3.068493 0.252068 0.182639 \n",
+ "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 \n",
+ "0 Ready_LightFMcontent 182.840876 180.771141 0.161294 0.100424 \n",
+ "0 Ready_SVD 0.953076 0.750219 0.094804 0.045302 \n",
+ "0 Self_SVD 0.913840 0.717167 0.105620 0.044070 \n",
+ "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 \n",
+ "0 Ready_SVDBiased 0.941830 0.742841 0.083033 0.034867 \n",
+ "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 \n",
+ "0 Ready_Random 1.513348 1.214309 0.044221 0.019366 \n",
+ "0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 \n",
+ "0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 \n",
+ "0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 \n",
+ "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 \n",
+ "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 \n",
+ "\n",
+ " F_1 F_05 precision_super recall_super NDCG mAP \\\n",
+ "0 0.225202 0.265538 0.246459 0.266934 0.413969 0.277036 \n",
+ "0 0.214731 0.253177 0.232725 0.254485 0.391316 0.257793 \n",
+ "0 0.186749 0.216980 0.204185 0.240096 0.339114 0.204905 \n",
+ "0 0.175182 0.199457 0.167167 0.216308 0.295331 0.163847 \n",
+ "0 0.118732 0.141584 0.130472 0.137473 0.214651 0.111707 \n",
+ "0 0.101736 0.121096 0.101395 0.110660 0.184311 0.091346 \n",
+ "0 0.051519 0.065833 0.083691 0.074336 0.107620 0.051155 \n",
+ "0 0.053839 0.071381 0.096030 0.074982 0.109138 0.051857 \n",
+ "0 0.046030 0.061286 0.079614 0.056463 0.095957 0.043178 \n",
+ "0 0.041967 0.055644 0.072425 0.054271 0.090974 0.041243 \n",
+ "0 0.031383 0.041343 0.040558 0.032107 0.067695 0.027470 \n",
+ "0 0.022599 0.029593 0.026288 0.018226 0.047273 0.017729 \n",
+ "0 0.010593 0.016046 0.021137 0.009522 0.024214 0.008958 \n",
+ "0 0.001105 0.001602 0.002253 0.000930 0.003444 0.001362 \n",
+ "0 0.000305 0.000449 0.000536 0.000198 0.000845 0.000274 \n",
+ "0 0.000278 0.000463 0.000644 0.000189 0.000752 0.000168 \n",
+ "0 0.000140 0.000189 0.000000 0.000000 0.000214 0.000037 \n",
+ "\n",
+ " MRR LAUC HR Reco in test Test coverage Shannon \\\n",
+ "0 0.648029 0.610845 0.916225 1.000000 0.352814 5.363070 \n",
+ "0 0.606204 0.605708 0.906681 1.000000 0.272006 5.031437 \n",
+ "0 0.572157 0.593544 0.875928 1.000000 0.077201 3.875892 \n",
+ "0 0.500282 0.588672 0.873807 0.999894 0.497835 5.727745 \n",
+ "0 0.400939 0.555546 0.765642 1.000000 0.038961 3.159079 \n",
+ "0 0.352019 0.547187 0.705196 0.979533 0.269120 4.940084 \n",
+ "0 0.234251 0.519361 0.490986 0.993425 0.206349 4.406898 \n",
+ "0 0.202054 0.518772 0.478261 0.872959 0.144300 3.912577 \n",
+ "0 0.198193 0.515501 0.437964 1.000000 0.033911 2.836513 \n",
+ "0 0.195741 0.514084 0.418876 0.998409 0.168831 4.152102 \n",
+ "0 0.171187 0.509546 0.384942 1.000000 0.025974 2.711772 \n",
+ "0 0.114687 0.506181 0.301166 0.986002 0.184704 5.093324 \n",
+ "0 0.048068 0.499885 0.154825 0.402333 0.434343 5.133650 \n",
+ "0 0.011760 0.496724 0.021209 0.482821 0.059885 2.232578 \n",
+ "0 0.002744 0.496441 0.007423 0.602121 0.010823 2.089186 \n",
+ "0 0.001677 0.496424 0.009544 0.600530 0.005051 1.803126 \n",
+ "0 0.000368 0.496391 0.003181 0.392153 0.115440 4.174741 \n",
+ "\n",
+ " Gini \n",
+ "0 0.885116 \n",
+ "0 0.918177 \n",
+ "0 0.974947 \n",
+ "0 0.825683 \n",
+ "0 0.987317 \n",
+ "0 0.924146 \n",
+ "0 0.953781 \n",
+ "0 0.971609 \n",
+ "0 0.991139 \n",
+ "0 0.964603 \n",
+ "0 0.992003 \n",
+ "0 0.907405 \n",
+ "0 0.877999 \n",
+ "0 0.994487 \n",
+ "0 0.995706 \n",
+ "0 0.996380 \n",
+ "0 0.965327 "
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dir_path=\"Recommendations generated/ml-100k/\"\n",
+ "super_reactions=[4,5]\n",
+ "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
+ "\n",
+ "ev.evaluate_all(test, dir_path, super_reactions)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/P2. Evaluation.pdf b/P2. Evaluation.pdf
new file mode 100644
index 0000000..60e0c74
Binary files /dev/null and b/P2. Evaluation.pdf differ
diff --git a/evaluation_measures.py b/evaluation_measures.py
new file mode 100644
index 0000000..ced6c1b
--- /dev/null
+++ b/evaluation_measures.py
@@ -0,0 +1,214 @@
+import os
+import sys
+import numpy as np
+import pandas as pd
+import math
+from sklearn.preprocessing import normalize
+from tqdm import tqdm
+from datetime import datetime, date
+import random
+import scipy.sparse as sparse
+from os import listdir
+from os.path import isfile, join
+from collections import defaultdict
+
+
+def evaluate(test,
+ estimations_df,
+ reco,
+ super_reactions=[4,5],
+ topK=10):
+
+ estimations_df=estimations_df.copy()
+ reco=reco.copy()
+ test_df=test.copy()
+
+ # prepare testset
+ test_df.columns=['user', 'item', 'rating', 'timestamp']
+ test_df['user_code'] = test_df['user'].astype("category").cat.codes
+ test_df['item_code'] = test_df['item'].astype("category").cat.codes
+
+ user_code_id = dict(enumerate(test_df['user'].astype("category").cat.categories))
+ user_id_code = dict((v, k) for k, v in user_code_id.items())
+ item_code_id = dict(enumerate(test_df['item'].astype("category").cat.categories))
+ item_id_code = dict((v, k) for k, v in item_code_id.items())
+
+ test_ui = sparse.csr_matrix((test_df['rating'], (test_df['user_code'], test_df['item_code'])))
+
+ #prepare estimations
+ estimations_df.columns=['user', 'item' ,'score']
+ estimations_df['user_code']=[user_id_code[user] for user in estimations_df['user']]
+ estimations_df['item_code']=[item_id_code[item] for item in estimations_df['item']]
+ estimations=sparse.csr_matrix((estimations_df['score'], (estimations_df['user_code'], estimations_df['item_code'])), shape=test_ui.shape)
+
+ #compute_estimations
+ estimations_df=estimations_metrics(test_ui, estimations)
+
+ #prepare reco
+ users=reco[:,:1]
+ items=reco[:,1::2]
+ # Let's use inner ids instead of real ones
+ users=np.vectorize(lambda x: user_id_code.setdefault(x, -1))(users) # maybe users we recommend are not in test set
+ items=np.vectorize(lambda x: item_id_code.setdefault(x, -1))(items) # maybe items we recommend are not in test set
+ # Let's put them into one array
+ reco=np.concatenate((users, items), axis=1)
+
+ #compute ranking metrics
+ ranking_df=ranking_metrics(test_ui, reco, super_reactions=super_reactions, topK=topK)
+
+ #compute diversity metrics
+ diversity_df=diversity_metrics(test_ui, reco, topK)
+
+ result=pd.concat([estimations_df, ranking_df, diversity_df], axis=1)
+
+ return(result)
+
+
+def ranking_metrics(test_ui, reco, super_reactions=[], topK=10):
+
+ nb_items=test_ui.shape[1]
+ relevant_users, super_relevant_users, prec, rec, F_1, F_05, prec_super, rec_super, ndcg, mAP, MRR, LAUC, HR=\
+ 0,0,0,0,0,0,0,0,0,0,0,0,0
+
+ cg = (1.0 / np.log2(np.arange(2, topK + 2)))
+ cg_sum = np.cumsum(cg)
+
+ for (nb_user, user) in tqdm(enumerate(reco[:,0])):
+ u_rated_items=test_ui.indices[test_ui.indptr[user]:test_ui.indptr[user+1]]
+ nb_u_rated_items=len(u_rated_items)
+ if nb_u_rated_items>0: # skip users with no items in test set (still possible that there will be no super items)
+ relevant_users+=1
+
+ u_super_items=u_rated_items[np.vectorize(lambda x: x in super_reactions)\
+ (test_ui.data[test_ui.indptr[user]:test_ui.indptr[user+1]])]
+ # more natural seems u_super_items=[item for item in u_rated_items if test_ui[user,item] in super_reactions]
+ # but accesing test_ui[user,item] is expensive -we should avoid doing it
+ if len(u_super_items)>0:
+ super_relevant_users+=1
+
+ user_successes=np.zeros(topK)
+ nb_user_successes=0
+ user_super_successes=np.zeros(topK)
+ nb_user_super_successes=0
+
+ # evaluation
+ for (item_position,item) in enumerate(reco[nb_user,1:topK+1]):
+ if item in u_rated_items:
+ user_successes[item_position]=1
+ nb_user_successes+=1
+ if item in u_super_items:
+ user_super_successes[item_position]=1
+ nb_user_super_successes+=1
+
+ prec_u=nb_user_successes/topK
+ prec+=prec_u
+
+ rec_u=nb_user_successes/nb_u_rated_items
+ rec+=rec_u
+
+ F_1+=2*(prec_u*rec_u)/(prec_u+rec_u) if prec_u+rec_u>0 else 0
+ F_05+=(0.5**2+1)*(prec_u*rec_u)/(0.5**2*prec_u+rec_u) if prec_u+rec_u>0 else 0
+
+ prec_super+=nb_user_super_successes/topK
+ rec_super+=nb_user_super_successes/max(len(u_super_items),1)
+ ndcg+=np.dot(user_successes,cg)/cg_sum[min(topK, nb_u_rated_items)-1]
+
+ cumsum_successes=np.cumsum(user_successes)
+ mAP+=np.dot(cumsum_successes/np.arange(1,topK+1), user_successes)/min(topK, nb_u_rated_items)
+ MRR+=1/(user_successes.nonzero()[0][0]+1) if user_successes.nonzero()[0].size>0 else 0
+ LAUC+=(np.dot(cumsum_successes, 1-user_successes)+\
+ (nb_user_successes+nb_u_rated_items)/2*((nb_items-nb_u_rated_items)-(topK-nb_user_successes)))/\
+ ((nb_items-nb_u_rated_items)*nb_u_rated_items)
+
+ HR+=nb_user_successes>0
+
+
+ result=[]
+ result.append(('precision', prec/relevant_users))
+ result.append(('recall', rec/relevant_users))
+ result.append(('F_1', F_1/relevant_users))
+ result.append(('F_05', F_05/relevant_users))
+ result.append(('precision_super', prec_super/super_relevant_users))
+ result.append(('recall_super', rec_super/super_relevant_users))
+ result.append(('NDCG', ndcg/relevant_users))
+ result.append(('mAP', mAP/relevant_users))
+ result.append(('MRR', MRR/relevant_users))
+ result.append(('LAUC', LAUC/relevant_users))
+ result.append(('HR', HR/relevant_users))
+
+ df_result=pd.DataFrame()
+ if len(result)>0:
+ df_result=(pd.DataFrame(list(zip(*result))[1])).T
+ df_result.columns=list(zip(*result))[0]
+ return df_result
+
+
+def estimations_metrics(test_ui, estimations):
+ result=[]
+
+ RMSE=(np.sum((estimations.data-test_ui.data)**2)/estimations.nnz)**(1/2)
+ result.append(['RMSE', RMSE])
+
+ MAE=np.sum(abs(estimations.data-test_ui.data))/estimations.nnz
+ result.append(['MAE', MAE])
+
+ df_result=pd.DataFrame()
+ if len(result)>0:
+ df_result=(pd.DataFrame(list(zip(*result))[1])).T
+ df_result.columns=list(zip(*result))[0]
+ return df_result
+
+def diversity_metrics(test_ui, reco, topK=10):
+
+ frequencies=defaultdict(int)
+
+ for item in list(set(test_ui.indices)):
+ frequencies[item]=0
+
+ for item in reco[:,1:].flat:
+ frequencies[item]+=1
+
+ nb_reco_outside_test=frequencies[-1]
+ del frequencies[-1]
+
+ frequencies=np.array(list(frequencies.values()))
+
+ nb_rec_items=len(frequencies[frequencies>0])
+ nb_reco_inside_test=np.sum(frequencies)
+
+ frequencies=frequencies/np.sum(frequencies)
+ frequencies=np.sort(frequencies)
+
+ with np.errstate(divide='ignore'): # let's put zeros we items with 0 frequency and ignore division warning
+ log_frequencies=np.nan_to_num(np.log(frequencies), posinf=0, neginf=0)
+
+ result=[]
+ result.append(('Reco in test', nb_reco_inside_test/(nb_reco_inside_test+nb_reco_outside_test)))
+ result.append(('Test coverage', nb_rec_items/test_ui.shape[1]))
+ result.append(('Shannon', -np.dot(frequencies, log_frequencies)))
+ result.append(('Gini', np.dot(frequencies, np.arange(1-len(frequencies), len(frequencies), 2))/(len(frequencies)-1)))
+
+ df_result=(pd.DataFrame(list(zip(*result))[1])).T
+ df_result.columns=list(zip(*result))[0]
+ return df_result
+
+
+
+def evaluate_all(test,
+ dir_path="Recommendations generated/ml-100k/",
+ super_reactions=[4,5],
+ topK=10):
+
+ models = list(set(['_'.join(f.split('_')[:2]) for f in listdir(dir_path)
+ if isfile(dir_path+f)]))
+ result=[]
+ for model in models:
+ estimations_df=pd.read_csv('{}{}_estimations.csv'.format(dir_path, model), header=None)
+ reco=np.loadtxt('{}{}_reco.csv'.format(dir_path, model), delimiter=',')
+ to_append=evaluate(test, estimations_df, reco, super_reactions, topK)
+
+ to_append.insert(0, "Model", model)
+ result.append(to_append)
+ result=pd.concat(result)
+ result=result.sort_values(by='recall', ascending=False)
+ return result
\ No newline at end of file
diff --git a/helpers.py b/helpers.py
new file mode 100644
index 0000000..94e5f3b
--- /dev/null
+++ b/helpers.py
@@ -0,0 +1,90 @@
+import pandas as pd
+import numpy as np
+import scipy.sparse as sparse
+import surprise as sp
+import time
+from collections import defaultdict
+from itertools import chain
+from six.moves.urllib.request import urlretrieve
+import zipfile
+import os
+
+def data_to_csr(train_read, test_read):
+ train_read.columns=['user', 'item', 'rating', 'timestamp']
+ test_read.columns=['user', 'item', 'rating', 'timestamp']
+
+ # Let's build whole dataset
+ train_and_test=pd.concat([train_read, test_read], axis=0, ignore_index=True)
+ train_and_test['user_code'] = train_and_test['user'].astype("category").cat.codes
+ train_and_test['item_code'] = train_and_test['item'].astype("category").cat.codes
+
+ user_code_id = dict(enumerate(train_and_test['user'].astype("category").cat.categories))
+ user_id_code = dict((v, k) for k, v in user_code_id.items())
+ item_code_id = dict(enumerate(train_and_test['item'].astype("category").cat.categories))
+ item_id_code = dict((v, k) for k, v in item_code_id.items())
+
+ train_df=pd.merge(train_read, train_and_test, on=list(train_read.columns))
+ test_df=pd.merge(test_read, train_and_test, on=list(train_read.columns))
+
+ # Take number of users and items
+ (U,I)=(train_and_test['user_code'].max()+1, train_and_test['item_code'].max()+1)
+
+ # Create sparse csr matrices
+ train_ui = sparse.csr_matrix((train_df['rating'], (train_df['user_code'], train_df['item_code'])), shape=(U, I))
+ test_ui = sparse.csr_matrix((test_df['rating'], (test_df['user_code'], test_df['item_code'])), shape=(U, I))
+
+ return train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code
+
+
+def get_top_n(predictions, n=10):
+
+ # Here we create a dictionary which items are lists of pairs (item, score)
+ top_n = defaultdict(list)
+ for uid, iid, true_r, est, _ in predictions:
+ top_n[uid].append((iid, est))
+
+ result=[]
+ # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)
+ for uid, user_ratings in top_n.items():
+ user_ratings.sort(key=lambda x: x[1], reverse=True)
+ result.append([uid]+list(chain(*user_ratings[:n])))
+ return result
+
+
+def ready_made(algo, reco_path, estimations_path):
+ reader = sp.Reader(line_format='user item rating timestamp', sep='\t')
+ trainset = sp.Dataset.load_from_file('./Datasets/ml-100k/train.csv', reader=reader)
+ trainset = trainset.build_full_trainset() # -> it is needed for using Surprise package
+
+ testset = sp.Dataset.load_from_file('./Datasets/ml-100k/test.csv', reader=reader)
+ testset = sp.Trainset.build_testset(testset.build_full_trainset())
+
+ algo.fit(trainset)
+
+ antitrainset = trainset.build_anti_testset() # We want to predict ratings of pairs (user, item) which are not in train set
+ print('Generating predictions...')
+ predictions = algo.test(antitrainset)
+ print('Generating top N recommendations...')
+ top_n = get_top_n(predictions, n=10)
+ top_n=pd.DataFrame(top_n)
+ top_n.to_csv(reco_path, index=False, header=False)
+
+ print('Generating predictions...')
+ predictions = algo.test(testset)
+ predictions_df=[]
+ for uid, iid, true_r, est, _ in predictions:
+ predictions_df.append([uid, iid, est])
+ predictions_df=pd.DataFrame(predictions_df)
+ predictions_df.to_csv(estimations_path, index=False, header=False)
+
+
+def download_movielens_100k_dataset(force=False):
+ os.makedirs('./Datasets/', exist_ok = True)
+ if not os.path.isdir('Datasets/ml-100k') or force:
+ url = 'http://files.grouplens.org/datasets/movielens/ml-100k.zip'
+ tmp_file_path = 'Datasets/ml-100k.zip'
+ urlretrieve(url, tmp_file_path)
+
+ with zipfile.ZipFile(tmp_file_path, 'r') as tmp_zip:
+ tmp_zip.extractall('Datasets/')
+ os.remove(tmp_file_path)
\ No newline at end of file