diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3c5d301 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +Datasets/ +*.csv +__pycache__/ \ No newline at end of file diff --git a/P0. Data preparation.ipynb b/P0. Data preparation.ipynb new file mode 100644 index 0000000..69f002c --- /dev/null +++ b/P0. Data preparation.ipynb @@ -0,0 +1,684 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Building train and test sets" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# if you don't have some library installed try using pip (or pip3) to install it - you can do it from the notebook\n", + "# example: !pip install tqdm\n", + "# also on labs it's better to use python3 kernel - ipython3 notebook\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.sparse as sparse\n", + "import time\n", + "import random\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "import helpers\n", + "\n", + "os.makedirs('./Datasets/', exist_ok = True)\n", + "\n", + "helpers.download_movielens_100k_dataset()\n", + "\n", + "df=pd.read_csv('./Datasets/ml-100k/u.data',delimiter='\\t', header=None)\n", + "df.columns=['user', 'item', 'rating', 'timestamp']\n", + "\n", + "train, test = train_test_split(df, test_size=0.2, random_state=30)\n", + "\n", + "train.to_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, index=False)\n", + "test.to_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None, index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Interactions properties" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### How data looks like?" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
useritemratingtimestamp
01962423881250949
11863023891717742
2223771878887116
3244512880606923
41663461886397596
\n", + "
" + ], + "text/plain": [ + " user item rating timestamp\n", + "0 196 242 3 881250949\n", + "1 186 302 3 891717742\n", + "2 22 377 1 878887116\n", + "3 244 51 2 880606923\n", + "4 166 346 1 886397596" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sample properties" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We have 943 users, 1682 items and 100000 ratings.\n", + "\n", + "Average number of ratings per user is 106.0445. \n", + "\n", + "Average number of ratings per item is 59.453.\n", + "\n", + "Data sparsity (% of missing entries) is 6.3047%.\n" + ] + } + ], + "source": [ + "users, items, ratings=df['user'].nunique(), df['item'].nunique(), len(df)\n", + "\n", + "print(f'We have {users} users, {items} items and {ratings} ratings.\\n')\n", + "\n", + "print(f'Average number of ratings per user is {round(ratings/users,4)}. \\n')\n", + "print(f'Average number of ratings per item is {round(ratings/items,4)}.\\n')\n", + "print(f'Data sparsity (% of missing entries) is {round(100*ratings/(users*items),4)}%.')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6UAAAHvCAYAAACsfXllAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzdeZgcZbn+8fvJAoKgLIaYBGQQWUJImCSNBBGMC8vJJohL+ImAeBL0wEEE0ShCAqIGgSPimkQQUFlUDpgFkUWQXZgJCQdDFIID2QghrNmAwPP7o2omXVM9PZ3MTL01qe/nuvqa7qrq6rufqUn66ap6y9xdAAAAAACE0CN0AAAAAABAcdGUAgAAAACCoSkFAAAAAARDUwoAAAAACIamFAAAAAAQDE0pAAAAACAYmlIAyIiZTTEzj28jQ+fpbsxsJzP7npk9amavmdnbcS1fDp2tq5jZVWXbTF3oPAAAdAWaUgCdruxDdPPtoBqeM75s+SkZxEQ3YmZ9JTVK+rakeknbSbKgoTaDmY2Mv5yYQpMJAECkV+gAAArh+5I+HjoEurVzJNXF9++X9FtJKyS5pDcDZdocIyVNju/fLakpVBAAAPKCphRAFj5mZp9w9ztCB0G3NSr++ZKkI9x9bcgwWXH3kySdFDgGAABdisN3AXSl8sbh+8FSYEuwW/zzn0VpSAEAKAqaUgBdabGkm+L7B5rZp0KGQbe2Vfzz9aApAABAp6MpBdDVviPp7fj+hWbWc3NXVDYQ0t0dXdbM7m5eJn7cw8xOjqc/b2ZrzOxxM/uOmW3f6rnvNbPvmtljZvaqmb1iZveY2Wc34z2NMrM/mdkSM3s9/nmdmR28Cet4j5mdY2b3mtlzZvaGma2MH3/DzLZr5/lNcS2a4sfvMLPTzew+M1sRj3J796a+t7L1b2tmXzOzu+J8r8c1vs/MvmVm727jeS2jFZdN/kiFgbRGbmKek8qee1I8rWRmvzKzp+LffWK9Fjk0Hv33r2a2LH4fa8zs32Z2vZmNrfKaU+L3Mbls8l0V3svdrZ5XdfTdeOCkxABhZvY+M7vUzBbG+V42swfM7L/MrKbTdszsGDObE//+18fbyG8tHrSsUg3bWM8nzez3Zva0ma2N17XUzOab2R/iTDvXkqmN9SfqZmZ9zOwCM/u/+G/zVTNrNLNJZrbNJqz3qLj2T1o00vNaM1sUT/twO8/d5O2rxkw1j8Rcy7Lx3/l/mdntZrY83p5Xx7/rR8zsCjP7jJltVen5Zes52Mx+YWYL4m1tvZk9a2Y3mNnodp5bafvdx8wuM7Mn4t9f1W0MwBbC3blx48atU2+KBp9xSQvjx1eVTTupjeeML1tmSjvrvXsTMlRcVtEgM83LbCfpjrLHrW9zJe0YP+9gSc9XWfaSKpmmlC03UtLPqqznLUmTa3ifJ0l6tcp6XNJzkg6uso6meLkmSXtIerzCOtqteRvrHiFpaTv5XlB0nmi1elW7jdzETCeVb4+SJknaUG29kn5dY5Y/S3pXB97L3a2ed1XZvLoK6x1ZNn+KpKMUnXfb1vpvk7R1ldr0lvT7Ks/fIOms1jWssJ5tJM2u8T2f0Qn/1twtaaikJVVe50lJe7Szvj6S7qwh868k9e6s7avG91p1W9jE7WbPuB61/H7q23iNd0q6vobnz5a0fRvraL39nqDotI/W60htY9y4cduybgx0BCALUyQdp+gQzClmdq27vxE2UsKvFY0OfL+iD+TPSdpd0qnxz6GSLjOzyZL+ouh9/ErSfZLekHSopAmKBo87y8xu9fYHdfqqpKMVNWS/kvSYpG0VNRXHKjqSZYqZrXL3n1ZagZl9VdJl8cO1kv4o6QFJqyS9J17XOEl9Jd1hZge6+4IqmbaW9L+SBsXv7UZJyxR9UO/bzvuplG+opL8qalAk6VFJ10p6VtJ7JX1W0iGSdpY028yOcPe7y1ZxvaR58f3mw8D/oWjve7nHNzVbmc8pqtMrkq5WdNmZtyQdEE9rto2iQ4f/JulhSYskrVFUm70lfUHSTvG6rlH0uy3X/F7Gx68pSedWyP5CB95LvaSzFV0qZ5qkB+PMJUlfVtREHK5oJOPz2ljHdEmfie+vV9TcPKioJiVJX5J0iaJtrZrvS2reS7Zc0WjJ/5C0WtGXQB9Q9AXPYbW/varerWjbHaCo8b5Z0ouS9okzvy9+zTvNrN7dX229AjPbSdF73TOeND9e51OKjvbYX1GTOSBeZy+1PwhVrdtXZszMJP1BUT2kaLv8o6SnFY1kvaOkgZI+qmibqrSOrRV9kTcinrRI0g2SnojX8QFFDebeiraDm83scHd/u8Lqmh2iaNt8S9IViv49Xq/od/jcZrxVAN1J6K6YGzduW95NG7/dXlg27fKy6adXeE7IPaUu6dsVlumjjXv5Nij68Pa8pCEVlv1C2bpuaeM1p7R6zccl7VJhuaMVfbBzRY3P+yosUypb5tFKy8TLjVHUOLukh9pYpqlVrq91wjbQQ8k9rpdJ6lFhuXPLlnlW0js6+ruvIdtJrd7vE5L6t/OcQyXtUGX+O5Xcw/iRGraBkTVkvaps+boK80e2ei/PSNqrwnIfLNteXlSFvaWKvphpXs9KSftXWKauwvZyUqtlekp6OZ7XVGkbb/U3tm8Hfpet96idWmGZ7STdVbbMT9pY103x/LclfbWNZbZT9MVU87qO6oztq8b3WnVbqHVZRf92NM+bJalnlfXsJ2nnCtN/VLaOH0rqVWGZ3ooa8eblvlzD9rtc0n4drRU3bty6341zSgFk5XuKGixJOsfaOc8xY39x99TowO6+UlLzXsqeivZunObuj1VY9jeKDoeTokvgtHckygZJn3P35yus62ZJl8YPt5X0lQrPP0/RnprXJI1x92crvYi7z5Y0NX54kJl9qJ1cN7n7j9pZphZjFO1xlaSHFDW6qb0k7v5dSXPih7tJOr4TXntTuKTx7r6s6kLu97r7y1Xmr1G096x5G/9C50XcJMe7+5OtJ7r7w4r2ZEnRnrAPVnju18run+buqT3Q7t6k9vcO9lG051KS/lRpGy9b30p3X9jO+mp1vbv/rMJrrFb0pVfz3tEvmdkO5cuY2TBt3Lv9I3f/cRt5m9fVvJfzzHYy1bR9ZewDZfevdPe32lrQ3Re4+6ryaWbWT9J/xQ//192/4e4bKjz3TUn/qWgPrNR+rSTpFK9+NAeALRRNKYBMuPsKbTzUdBdJZwSM01rFw2Nj95fdX6Hqhy3eF//cWhsPAWzLX9z9H1XmX6boMDZJOqZ8hpntqI2HRl7n7kvbea3flt0/op1lf9LO/FqVj7R8sbt7lWWnlt3PeoTme919fmesyN1fk/R/8cODOmOdm+hRd7+3yvy/lt3fr3yGmb1DG7eNZYoO76zIo0OsU1/MlFnX1ut0sUvbmhH/+9P8d7CNokNqyzV/ieDV1hOv6yVJt8QPD4sPZW1Lp21fnaj8kkqD2lyqbZ/VxtGwL6m2YNyYNn8Zslc7AzQ9o2jPLYAC4pxSAFm6WNFev50kfd3Mfu7uLwbOJEl/rzJvRdn9xkp7+9pYdsd2XvPOajPd/Tkze0LReWx7m9m73b1578wh2vil4ltm1vr8xdZ6l90fWGW5txSdU9cZmvfEuaTb21n2AW081zDrZq5aE5cQNx+flfRJRXvN+yrKbBUW37VT0m2ah9qZX/7lRevt8wBt3E7uaWc7l6LD34dUmuHur5jZw4q2gU+Y2U2Kvuy4N25SusIris7XrOav2riH70BF5/k2OzT++bKkD0anXVa1ddnP9ys6RLeSmrevDN2n6IuDbSRNjr/kurrSESBtOLTs/q41/PtTvq0NVHRId8Vc7Xx5BWALRlMKIDPxh9WLJF2k6PC+SZK+ETaVpGhgoLaUXxez2nKtl31HO8s+1c785mX2V9T0vFcbDxmsK1vmK6p8eG9bqjXLq9x9/Sasq5p+8c/n4j2IbXL3t81skaLGaCcz28qzGwirvb3MkiQzG6xo4Ke9alzvuzY70eZrb5Ckattn/7L7T6t97S1zqqIvXt6l6LDYoyWtMbO/K2qK7pB0fw3Nb60W1dDQlP/N9W81ry7+uaM2DqpVq2p/UzVtX1ly9xfN7GuSfqHoc+CZks40s+cVfUF0r6Q/u3tbjXZd2f3fb+LLd6taAcgOh+8CyNpPFB0eKEmnmVnrD4eZ24QPxp31AVpKHkLXljVl98vPwa14Xc8aVbvm4Loq8zZV87Vd11RdaqPVFZ6bhXbfczwq6x3a2JAuVvSB/quS/p+iQ46PiW/Nh2SH+P+1I9vnO8vub+q2meLuDYpGbr1GG2v8TkkfU3Q+9D2SFpnZ5zc9akUd+XuSusffVKdx92mKRte9Uxu3m10UfXlwqaQFZna/mVU697hQtQKQDfaUAsiUu68zs+8q+lC/jaIPqF/uzNcws+7whdu2NSxT3iisbuP+ye7+686J1Klek7SDku+hmvImoeqe1QBOU/SBXYpGE/3PSgO7SJKZnZNZqs5V3rBt6rZZkbv/W9KJZnaKosu/fEjShyV9RNHffp2k35rZ7pUGGttEHfl7an68g6Rn3X33DmbJg3b/DXT3v0n6m5ntrOiQ3IMV/W4OjJ//IUn3VbhUU3PtXNGou535ZR2AguoOH9wAbHmuUHRdOykaCfMD1RYu03xIZ7Vv26XoGp15V8t7bl7GlbxOX/lhbiHOXazF8vjne9sbaTm+bmLzwFCrMjx0t1afiH9ukHRGWw1prLs2NOWjw76/huVrWUaS5O7r3f0ud/+eu/+Hogb/m4q2a0k6L26MOmJPa/9E0PK/udaj4Tb/Te1iZr2VT+WHX3fav4Huvsrdb3b3b7r7CEXXdL02nt1b6cGMmmtliq7ZCgAdRlMKIHPxYCfnxQ97Sbqgxqc2X5KjvUN+Q4x8uqk+Vm2mmb1XGwcl+lfZIEdSdOhj8wf69kbTDeXh+KdpY1PXlg9p457Sh6stGEjf+OeqapeFMbOhii6HUk35XqV2R9PJ0HxF1zGVohFl2/t8MHJzX8jdV7v7DxWdoytFgwUduLnri71b0rB2lvlo2f1HWs37W/zzHZIO62CWrlK+7bX5b6CZ9VR0LdLNEo/mfaI2fhE23My2KVvkb2X38/rvD4BuhqYUQCjXaeNlJcYrGuSmPc3Xr9vdzKrtqTm9I8EycpSZVRsJ93RF10aVpP8tnxFf9/HW+OGHzSyPHwxvLLv/9Xb2Yn2zjeflRfP5iruYWbXzXc+rMq9Z+WGjtR7a3OXiAa5uix/2l/SZtpY1s5FqY+TdTdRUdr8zTidq8zqYZtZHG6+Bu1Yb/36aXVN2f3Lc2OVN+fU7q32pNV7tfzlSVXw0wJKySeW/n+u18aiVb5pZbrZjAN0XTSmAIOKRMpvPvzNJ/13D08o/SF5UqdExswvU/p65POgl6Yb4w3KCmY2V9PX44VpF59+29h1t3LN1vZm1vu5i63XubmaXmNku1ZbrRHO0cdCfQyRdXGnvm5l9W9LY+OFiSb/LJt4mad6rZpIubD3TIt9VNEhMe/5ddr+9PXtZ+1HZ/Z+a2f6tF4ivM3lVtZWY2VAzO9fM+lZZ5j3a2Pi6ql/3tFb/z8xS56fHTdN12jhAz5Wt93i7+9+18QuRQyX9zszaHEHZzHqZ2afM7NROyF2r27Xx2sWnmlnqUHEzK6mdaw2b2efN7Iut9n62XmaEpKHxw6fLR9B298Vlr7GXpFnxkR1trauHmX3CzL5TLReAYmOgIwDBuPtsM3tA0eGbtXzbfqWiS8jsJOnTku41s98puhTG+xTtISgp+iZ/fJeE7jw3K2pi/mFmMyT9n6LBWo5U9GG9ueH+ZvwhMMHd55rZVyTNUHSZhT+b2f2S/qyo8XlTUZ32VTS4TPPhfJd12TtK5nvbzI5XdImJbSSdJemj8e9riaJDYj8bZ1Oc94ROvCRNZ/q5pJMV7bk+3czqFe29fk7SbopG4B2qaE/WOknDq6zrXkXvtbeks82suSFrPl/wRXcPcgizu99pZldJOknROYmPxI8fUHTYcUlRHd4l6Y+K/gal9Ki/71Z0SP7keJt8QNK/FA1gtZOkwYpqtlO8/O/c/dkOxp+naKCiX8TXzbxJ0eGue0v6kjae6/tvbfwyrLWT4+UHS/qcpCPN7PeSGiS9pGg7HqDod324or+7KzqYu2buvszMrpX0BUW1e8TMfq5ou9tO0SHVx0l6UdE1Wdvam7qXpMmSfmJmtyv60mWxom1wF0VN+dHaeKRGpUGovqVodOWPKzos+mkzu1HRdY5XKjrn9b2KjoA5PL5/pyp8qQMAEk0pgPC+peQ5Sm1y95Vm9gVFDcHWivbAHdJqsdmKPlzmvSn9saIBQ06V9O0K813SBe7+07ZW4O5XxNcWnKGoyatUj3KrJGXW9Ln7PDP7uKI9UP0U7RmstHfwRUn/r9UIn7kRv4//lvRTRUcYHab0eYdPSPqkpF+1s64XzOwSRdv9dkqfT/03deB8zU4wUVGuTys6v/LLSo6O/baivfivaGNT2nq05ObznXuqcq3K3RC/Zke9IumLiv7+j4xvrS2SdIS7v1ppBe7+qpl9WNJ0RU3pDnG2avlaD5jU1c5Q1DTXKzpEd3Kr+csVXZqo2rWLm38/79TGa8hW8qakc9091Xi7+5tmNkrR5WO+oqhhP14bD5GuhOuQAmgTh+8CCMrd71H6/K5qy9+i6Nv3X0t6VtG5TSsl3aVoD8I4d+8W17tz99MkjZY0S9GH2zfinzdIOsTdp9SwjlmS9lDUOMxUtMdjnTbW5UFFh9qNldTf3V/o9DdSPd+DivbMnKmo4Vqp6MPuqjjbOZL2dPe/ZJlrU7n7LxQ1/H9QtIf0TUnPK9oLeKakkrs/VeO6vq1oj9at8bpyM9qwu7/p7p+RdKyifCsV7UF7VtGh1Ye4+6WSykfLfbHVOv6mqHH6uqIvJBYqOpf27fjnAkVHPXzE3cd31t+ru89TtBfzQkmPK2qWV0t6VNEXP0Pc/el21vGqu49X9OXJZfFzVykaeXm1pCcVHeVwpqLttpbziDuNu7+o6MiSSXG21You57NA0vckHRAfilzN9xR98fFdSX9RdG7vOkXv8SVFg41dJGk/d7+oSpY33P2/FR2NMVXS3xVtLxsUnXbwb0m3aGPtT9zkNwygMCw6rQsAAKA28aGan4of7hw3SyFyNH+I+Zu7jwyRAQDQcewpBQAANYsHOxoTP5wfqiEFAGw5aEoBAIAkycz2NLNdq8wfoGgQoa3iSdMyCQYA2KIx0BEAAGh2sKRfm9k9ikYKXqTofMOdJY1QNGLytvGyDykaFAgAgA6hKQUAAOV6KbqcSFuXFJGkuyUd6+5vVVkGAICa0JQCAIBmsxRdAuVwSQMVXa90J0UjBK9QNMLq9fGozwAAdIpcjL77nve8x+vq6kLH6DIrV65Unz59Qsdokbc8RUHdAQAAUFSNjY0vuHvFD8O52FNaV1enhoaG0DEAAAAAAF3AzJ5pax6j72ZgypQpoSMk5C1PUVB3AAAAIC0Xh++WSiXfkveUmpnyUOdmectTFNQdAAAARWVmje5eqjSPPaUAAAAAgGBoSgEAAAAAwdCUZiBvhybnLU9RUHcAAAAgjaYUAAAAABAMAx1lIG8D3OQtT1FQdwAAABQVAx0BAAAAAHKp3abUzK40s+fN7PGyaTeY2bz41mRm8+LpdWa2rmzeL7syPAAAAACge+tVwzJXSfqppGuaJ7j755rvm9mlkl4pW36Ru9d3VsAtweTJk0NHSMhbnqKg7gAAAEBaTeeUmlmdpNnuvn+r6SbpWUkfc/cn21quPVv6OaUAAAAAUGRdeU7poZJWuPuTZdP2MLNHzexvZnZolVATzazBzBpWrlzZwRj51r9//9AREvKWpyioOwAAAJDW0ab0OEnXlT1eLul97j5U0pmSrjWzd1V6ortPd/eSu5f69OnTwRj5tnz58pb7ixcv1kc/+lHtt99+GjRokH784x+3zJsyZYoGDBig+vp61dfX65ZbbpEk3X///RoyZIhKpZKefDLq/19++WUdccQRevvttzuUpyNuvvlmLViwoOXxeeedpzvuuEOSNHLkyM2+Luef/vQnDRkyRPX19SqVSrrvvvskSc8884yGDRum+vp6DRo0SL/8Zfc6Zbm9uv/zn/9s+d3X19frXe96ly677DJJ2W0bnaWrto2FCxfq4IMP1tZbb61LLrkkNf+tt97S0KFDNWbMmJZpd955Z8t28+EPf1hPPfXUZr02AAAAuoi7t3uTVCfp8VbTeklaIWnXKs+7W1KpvfUPHz7ct2RRmSPLli3zxsZGd3d/9dVXfa+99vJ//OMf7u4+efJkv/jii1PPP+aYY3zx4sV+7733+plnnunu7meddZbfddddHc7TESeeeKL/4Q9/qDjvIx/5iD/yyCObtd7XXnvN3377bXd3nz9/vu+zzz7u7v7666/7+vXrW5bZfffdfenSpZv1GiFsSt03bNjgffv29aamJnfPbtvoLF21baxYscIffvhh//a3v12xHpdeeqkfd9xxPnr06JZpe+21ly9YsMDd3X/2s5/5iSeeuFmvDQAAgM0nqcHb6Ac7sqf0E5IWuvuS5glm1sfMesb33y9pL0lPd+A1tgjDhg1rud+vX7+Wx9tvv70GDhyopUuXVn1+7969tXbtWq1du1a9e/fWokWLtHjxYo0cObLN59x6663ad999NWzYMJ1++ukte46a97g123///dXU1CRJOvroozV8+HANGjRI06dPb1lmu+220znnnKMDDjhAI0aM0IoVK/TAAw9o5syZOvvss1VfX69FixbppJNO0h//+MdUlttuu00HH3ywhg0bps985jNavXp11fe73XbbKTpdWVqzZk3L/a222kpbb721JOn1118Puidwc5RvB+258847teeee2r33Xevulxnbxvlex/zuG3ssssuOvDAA9W7d+/UvCVLlmjOnDn6z//8z8R0M9Orr74qSXrllVc4jBoAACBnarkkzHWSHpS0j5ktMbMvxbPGK3noriQdJumx+BIxf5T0ZXd/sTMDd0eNjY0Vpzc1NenRRx/VQQcd1DLtpz/9qYYMGaKTTz5ZL730kiTpW9/6lk444QT94Ac/0GmnnaZzzjlHF154YZuvt379ek2YMEGzZs1SY2OjnnvuucT8M844o+LzrrzySjU2NqqhoUGXX365Vq1aJSlqDEeMGKH58+frsMMO04wZM/ShD31I48aN08UXX6x58+Zpzz33rLjOF154QRdeeKHuuOMOzZ07V6VSSf/zP/8jKTqkc+bMmRWfd9NNN2nffffV6NGjdeWVV7ZMX7x4sYYMGaLddttN3/zmN7tVg9HWdlDJ9ddfr+OOOy4xLYttoy152jbacsYZZ+iHP/yhevRI/rP2q1/9SqNGjdKuu+6q3/zmN5o0adImrRcAAABdq92m1N2Pc/d+7t7b3Xd19yvi6Se5+y9bLXujuw9y93p3H+bus7oqeHcyceLE1LTVq1fr2GOP1WWXXaZ3vSs67fYrX/mKFi1apHnz5qlfv34666yzJEn19fV66KGHdNddd+npp59Wv3795O763Oc+p+OPP14rVqxIrHvhwoXaY489tNdee8nMdPzxxyfmV9pjJUmXX355yx6vxYsXt5yjuNVWW7XsTRs+fHjL3rNaPPTQQ1qwYIEOOeQQ1dfX6+qrr9YzzzwjSbrgggs0bty4is875phjtHDhQt18880699xzW6bvtttueuyxx/TUU0/p6quvTr33PKu0HVTyxhtvaObMmfrMZz7TMi2rbaMtedo2Kpk9e7Z22WUXDR8+PDXvRz/6kW655RYtWbJEX/ziF3XmmWfWvF4AAAB0vY4OdIQazJgxI/H4zTff1LHHHqvPf/7z+tSnPtUyvW/fvurZs6d69OihCRMm6OGHH048z9114YUX6txzz9X555+vH/7wh5owYYIuv/zymrP06tVLf//731ser1+/XpJ0991364477tCDDz6o+fPna+jQoS3zevfu3XIIbc+ePbVhw4aaX8/ddfjhh2vevHmaN2+eFixYoCuuuKLm5x922GF6+umn9cILLySm9+/fX/vvv7/uvffemtcVWuvtoC1//vOfNWzYMPXt27dlWlbbRvkh0XnfNsrdf//9mjlzpurq6jR+/Hj99a9/1fHHH6+VK1dq/vz5LUcjfO5zn9MDDzywWa8BAACArkFTmjF315e+9CUNHDgwtcemfHTWm266Sfvvn7zc6zXXXKNRo0Zpp5120tq1a9WjRw/16NFDa9euTSy37777qqmpSYsWLZIkXXfdxqOs6+rqWu7PnTtX//73vyVF59rtuOOO2nbbbbVw4UI99NBD7b6X7bffXq+99lrVZUaMGKH777+/ZcTTNWvW6F//+lfV5zz11FPNA2Vp7ty5ev3117XzzjtryZIlWrdunSTppZde0n333ad99tmn3ZzdzXXXXZc6dDerbWPu3LmS8rtttOUHP/iBlixZoqamJl1//fX62Mc+pt/+9rfacccd9corr7Ss9/bbb9fAgQM36zUAAADQNXqFDtBd1E2aU3V+09TRNa3n/vvv129+8xsNHjxY9fX1kqTvf//7GjVqlL7xjW9o3rx5MjPV1dVp2rRpLc9bu3atrrrqKt12222SpDPPPFOjRo3SVlttpWuvvTbxGu94xzs0ffp0jR49Wttuu60OPfTQlgbh2GOP1Re+8AUNGjRIBx10kPbee29J0lFHHaVf/vKXGjhwoPbZZx+NGDGi3fcyfvz4lr1xbR0S3KdPH1111VU67rjj9Prrr0uSLrzwQu29994677zzVCqVUodp3njjjbrmmmvUu3dvbbPNNrrhhhtkZnriiSd01llnyczk7vr617+uwYMH11L2bmPNmjW6/fbbE797SZltG9dcc02ut43nnntOpVJJr776qnr06KHLLrtMCxYsaDkEvrVevXppxowZOvbYY9WjRw/tuOOOiXOUAQAAEJ4175EKqVQq+eZetzArHWlKly1bFnRAnrvvvluXXHKJZs+enYs8RZXHurfeNgAAAICuYGaN7l6qNI/DdzOwKaOuZiFveYqCugMAAABp7CmtUWwg/EQAACAASURBVEf2lDYfbpoXectTFNQdAAAARcWeUgAAAABALtGUAgAAAACCoSnNQOuRVEPLW56ioO4AAABAGueU1qizLgkDAAAAAEXDOaWBmVnoCAl5y1MU1B0AAABIoykFAAAAAARDUwoAAAAACIamNANjxowJHSEhb3mKgroDAAAAaTSlGZg1a1boCAl5y1MU1B0AAABIoynNwNixY0NHSMhbnqKg7gAAAEAaTWkGZs+eHTpCQt7yFAV1BwAAANJoSpFiZjr++ONbHm/YsEF9+vTZ5HMiR44cqebrz44aNUovv/xyp+aUpGeffVZHHHGEBg4cqP32209NTU2SpJNOOkl77LGH6uvrVV9fr3nz5nX6awMAAADouF6hAyB/3vnOd+rxxx/XunXrtM022+j222/XgAEDOrTOW265pZPSJZ1wwgk655xzdPjhh2v16tXq0WPj9ywXX3yxPv3pT3fJ6wIAAADoHOwpzYC7h46QUEueUaNGac6cOZKk6667Tscdd1zLvDVr1ujkk0/WBz/4QQ0dOlR/+tOfJEnr1q3T+PHjNXDgQB1zzDFat25dy3Pq6ur0wgsvSJKOPvpoDR8+XIMGDdL06dNbltluu+10zjnn6IADDtCIESO0YsWKqhkXLFigDRs26PDDD295/rbbbltjFbKXt+0AAAAAyAOa0gyUN155UEue8ePH6/rrr9f69ev12GOP6aCDDmqZ973vfU8f+9jH9PDDD+uuu+7S2WefrTVr1ugXv/iFtt12Wz3xxBM6//zz1djYWHHdV155pRobG9XQ0KDLL79cq1atkhQ1uyNGjND8+fN12GGHacaMGZKkmTNn6rzzzkut51//+pd22GEHfepTn9LQoUN19tln66233mqZf84552jIkCH62te+ptdff32TatQV8rYdAAAAAHlAU5qBU045JXSEhFryDBkyRE1NTbruuus0atSoxLzbbrtNU6dOVX19vUaOHKn169fr2Wef1T333NNyLuqQIUM0ZMiQiuu+/PLLW/aGLl68WE8++aQkaauttmo5b3X48OEt54eOGzdOF1xwQWo9GzZs0L333qtLLrlEjzzyiJ5++mldddVVkqQf/OAHWrhwoR555BG9+OKLuuiii2qqTVfK23YAAAAA5AFNKdo0btw4ff3rX08cuitFh6HeeOONmjdvnubNm6dnn31WAwcOrGmdd999t+644w49+OCDmj9/voYOHar169dLknr37i0zkyT17NlTGzZsqLquXXfdVfX19Xr/+9+vXr166eijj9bcuXMlSf369ZOZaeutt9YXv/hFPfzww5v69gEAAABkgKYUbTr55JM1efJkDR48ODH9yCOP1E9+8pOWcyQfffRRSdJhhx2ma6+9VpL0+OOP67HHHkut85VXXtGOO+6obbfdVgsXLtRDDz202fkOPPBAvfzyy1q5cqUk6a9//av2228/SdLy5cslRQ30zTffrP3333+zXwcAAABA16EpzcDMmTNDR0ioNc+uu+6q008/PTX93HPP1ZtvvqkhQ4Zo0KBBOvfccyVJX/nKV7R69WoNHDhQ5513noYPH5567lFHHaUNGzZo4MCBmjRpkkaMGFFT3krnlPbs2VOXXHKJPv7xj2vw4MFyd02YMEGS9PnPf16DBw/W4MGD9cILL+g73/lOTe+5K+VtOwAAAADywPIwImipVPLm61nmVd2kOVXnN00d3ea8ZcuWqX///p0dabPlLU9RUHcAAAAUlZk1unup0jz2lGago9f47Gx5y1MU1B0AAABIoykFAAAAAARDUwoAAAAACIamNAPNg+/kRd7yFAV1BwAAANIY6KhGHRnoCAAAAACKjIGOAqt0aZSQ8panKKg7AAAAkEZTmoG5c+eGjpCQtzxFQd0BAACANJpSAAAAAEAwNKUZ6NevX+gICXnLUxTUHQAAAEijKc3AsmXLQkdIyFueoqDuAAAAQBpNaQamTJkSOkJC3vIUBXUHAAAA0rgkTI06ckkYM1Me6twsb3mKgroDAACgqLgkDAAAAAAgl2hKAQAAAADB0JRmIG+HJuctT1FQdwAAACCNphQAAAAAEAxNaQZKpYrn8waTtzxFQd0BAACANJpSAAAAAEAwNKUAAAAAgGBoSjMwefLk0BES8panKKg7AAAAkGbuHjqDSqWS531k0rpJc6rOb5o6OqMkAAAAANC9mFmju1ccZIU9pRno379/6AgJectTFNQdAAAASKMpzcDy5ctDR0jIW56ioO4AAABAGk0pAAAAACAYmtIMDBs2LHSEhLzlKQrqDgAAAKS125Sa2ZVm9ryZPV42bYqZLTWzefFtVNm8b5nZU2b2TzM7squCdyeNjY2hIyTkLU9RUHcAAAAgrZY9pVdJOqrC9B+5e318u0WSzGw/SeMlDYqf83Mz69lZYburiRMnho6QkLc8RUHdAQAAgLR2m1J3v0fSizWu75OSrnf3193935KekvTBDuTbIsyYMSN0hIS85SkK6g4AAACkdeSc0tPM7LH48N4d42kDJC0uW2ZJPA0AAAAAgJTNbUp/IWlPSfWSlku6dFNXYGYTzazBzBpWrly5mTEAAAAAAN3ZZjWl7r7C3d9y97clzdDGQ3SXStqtbNFd42mV1jHd3UvuXurTp8/mxOg2li6tWIJg8panKKg7AAAAkLZZTamZ9St7eIyk5pF5Z0oab2Zbm9kekvaS9HDHInZ/eRt1NW95ioK6AwAAAGm92lvAzK6TNFLSe8xsiaTJkkaaWb0kl9Qk6RRJcvd/mNnvJS2QtEHSqe7+VtdE7z7GjRsndw8do0Xe8hQFdQcAAADS2m1K3f24CpOvqLL89yR9ryOhAAAAAADF0JHRdwEAAAAA6BCa0gxMmzYtdISEvOUpCuoOAAAApFkeznErlUre0NAQOkZVdZPmVJ3fNHV0RkkAAAAAoHsxs0Z3L1Wax57SDJhZ6AgJectTFNQdAAAASKMpBQAAAAAEQ1MKAAAAAAiGpjQDY8aMCR0hIW95ioK6AwAAAGk0pRmYNWtW6AgJectTFNQdAAAASKMpzcDYsWNDR0jIW56ioO4AAABAGk1pBmbPnh06QkLe8hQFdQcAAADSaEoBAAAAAMHQlAIAAAAAgqEpzYC7h46QkLc8RUHdAQAAgDSa0gxMnz49dISEvOUpCuoOAAAApFke9t6USiVvaGgIHaOquklzqs5vmjq6zXlmlqu9ZHnLUxTUHQAAAEVlZo3uXqo0jz2lAAAAAIBgaEoBAAAAAMHQlGZg5syZoSMk5C1PUVB3AAAAII2mNAPDhw8PHSEhb3mKgroDAAAAaTSlGRgwYEDoCAl5y1MU1B0AAABIoykFAAAAAARDUwoAAAAACIamNAMTJkwIHSEhb3mKgroDAAAAaebuoTOoVCp5Q0ND6BhV1U2aU3V+09TRGSUBAAAAgO7FzBrdvVRpHntKM5C3UVfzlqcoqDsAAACQRlOagblz54aOkJC3PEVB3QEAAIA0mlIAAAAAQDA0pRno169f6AgJectTFNQdAAAASKMpzcCyZctCR0jIW56ioO4AAABAGk1pBqZMmRI6QkLe8hQFdQcAAADSuCRMjTpySRgzUx7q3CxveYqCugMAAKCouCQMAAAAACCXaEoBAAAAAMHQlGYgb4cm5y1PUVB3AAAAII2mFAAAAAAQDE1pBkqliufzBpO3PEVB3QEAAIA0mlIAAAAAQDA0pQAAAACAYGhKMzB58uTQERLylqcoqDsAAACQZu4eOoNKpZLnfWTSuklzqs5vmjo6oyQAAAAA0L2YWaO7VxxkhT2lGejfv3/oCAl5y1MU1B0AAABIoynNwPLly0NHSMhbnqKg7gAAAEAaTSkAAAAAIBia0gwMGzYsdISEvOUpCuoOAAAApNGUZqCxsTF0hIS85SkK6g4AAACk0ZRmYOLEiaEjJOQtT1FQdwAAACCNS8LUqCOXhDEz5aHOzfKWpyioOwAAAIqKS8IAAAAAAHKJphQAAAAAEAxNaQaWLl0aOkJC3vIUBXUHAAAA0mhKM5C3UVfzlqcoqDsAAACQ1m5TamZXmtnzZvZ42bSLzWyhmT1mZjeZ2Q7x9DozW2dm8+LbL7syfHcxbty40BES8panKKg7AAAAkFbLntKrJB3VatrtkvZ39yGS/iXpW2XzFrl7fXz7cufEBAAAAABsidptSt39Hkkvtpp2m7tviB8+JGnXLsgGAAAAANjCdcY5pSdL+nPZ4z3M7FEz+5uZHdoJ6+/2pk2bFjpCQt7yFAV1BwAAANLM3dtfyKxO0mx337/V9HMklSR9yt3dzLaWtJ27rzKz4ZJuljTI3V+tsM6JkiZK0vve977hzzzzTEffS5eqmzSn6vymqaMzSgIAAAAA3YuZNbp7qdK8zd5TamYnSRoj6fMed7bu/rq7r4rvN0paJGnvSs939+nuXnL3Up8+fTY3RrdgZqEjJOQtT1FQdwAAACBts5pSMztK0jckjXP3tWXT+5hZz/j++yXtJenpzggKAAAAANjy9GpvATO7TtJISe8xsyWSJisabXdrSbfHe38eikfaPUzSBWb2pqS3JX3Z3V+suGIAAAAAQOG125S6+3EVJl/RxrI3Srqxo6G2NGPGjAkdISFveYqCugMAAABpnTH6Ltoxa9as0BES8panKKg7AAAAkEZTmoGxY8eGjpCQtzxFQd0BAACANJrSDMyePTt0hIS85SkK6g4AAACk0ZQCAAAAAIKhKQUAAAAABENTmgF3Dx0hIW95ioK6AwAAAGk0pRmYPn166AgJectTFNQdAAAASLM87L0plUre0NAQOkZVdZPmVJ3fNHV0m/PMLFd7yfKWpyioOwAAAIrKzBrdvVRpHntKAQAAAADB0JQCAAAAAIKhKc3AzJkzQ0dIyFueoqDuAAAAQBpNaQaGDx8eOkJC3vIUBXUHAAAA0mhKMzBgwIDQERLylqcoqDsAAACQRlMKAAAAAAiGphQAAAAAEAxNaQYmTJgQOkJC3vIUBXUHAAAA0szdQ2dQqVTyhoaG0DGqqps0p+r8pqmjM0oCAAAAAN2LmTW6e6nSPPaUZiBvo67mLU9RUHcAAAAgjaY0A3Pnzg0dISFveYqCugMAAABpNKUAAAAAgGBoSjPQr1+/0BES8panKKg7AAAAkEZTmoFly5aFjpCQtzxFQd0BAACANJrSDEyZMiV0hIS85SkK6g4AAACkcUmYGnXkkjBmpjzUuVne8hQFdQcAAEBRcUkYAAAAAEAu0ZQCAAAAAIKhKc1A3g5NzlueoqDuAAAAQBpNKQAAAAAgGJrSDJRKFc/nDSZveYqCugMAAABpNKUAAAAAgGBoSgEAAAAAwdCUZmDy5MmhIyTkLU9RUHcAAAAgzdw9dAaVSiXP+8ikdZPmVJ3fNHV0RkkAAAAAoHsxs0Z3rzjICntKM9C/f//QERLylqcoqDsAAACQRlOageXLl4eOkJC3PEVB3QEAAIA0mlIAAAAAQDA0pRkYNmxY6AgJectTFNQdAAAASKMpzUBjY2PoCAl5y1MU1B0AAABIoynNwMSJE0NHSMhbnqKg7gAAAEAal4SpUUcuCWNmykOdm+UtT1FQdwAAABQVl4QBAAAAAOQSTSkAAAAAIBia0gwsXbo0dISEvOUpCuoOAAAApNGUZiBvo67mLU9RUHcAAAAgjaY0A+PGjQsdISFveYqCugMAAABpNKUAAAAAgGBoSgEAAAAAwdCUZmDatGmhIyTkLU9RUHcAAAAgzdw9dAaVSiVvaGgIHaOquklzqs5vmjo6oyQAAAAA0L2YWaO7lyrNY09pBswsdISEvOUpCuoOAAAApNGUAgAAAACCqakpNbMrzex5M3u8bNpOZna7mT0Z/9wxnm5mdrmZPWVmj5nZsK4KDwAAAADo3mrdU3qVpKNaTZsk6U5330vSnfFjSfoPSXvFt4mSftHxmN3bmDFjQkdIyFueoqDuAAAAQFpNTam73yPpxVaTPynp6vj+1ZKOLpt+jUcekrSDmfXrjLDd1axZs0JHSMhbnqKg7gAAAEBaR84p7evuy+P7z0nqG98fIGlx2XJL4mmFNXbs2NAREvKWpyioOwAAAJDWKQMdeXRdmU26toyZTTSzBjNrWLlyZWfEyK3Zs2eHjpCQtzxFQd0BAACAtI40pSuaD8uNfz4fT18qabey5XaNpyW4+3R3L7l7qU+fPh2IAQAAAADorjrSlM6UdGJ8/0RJfyqbfkI8Cu8ISa+UHeYLAAAAAECLXrUsZGbXSRop6T1mtkTSZElTJf3ezL4k6RlJn40Xv0XSKElPSVor6YudnLnbiY5uzo+85SkK6g4AAACk1Tr67nHu3s/de7v7ru5+hbuvcvePu/te7v4Jd38xXtbd/VR339PdB7t7Q9e+hfybPn166AgJectTFNQdAAAASLM87L0plUre0JDv3rVu0pyq85umjm5znpnlai9Z3vIUBXUHAABAUZlZo7uXKs3rlNF3AQAAAADYHDSlAAAAAIBgaEozMHPmzNAREvKWpyioOwAAAJBGU5qB4cOHh46QkLc8RUHdAQAAgDSa0gwMGDAgdISEvOUpCuoOAAAApNGUAgAAAACCoSkFAAAAAARDU5qBCRMmhI6QkLc8RUHdAQAAgDRz99AZVCqVvKGhIXSMquomzak6v2nq6IySAAAAAED3YmaN7l6qNI89pRnI26irectTFNQdAAAASKMpzcDcuXNDR0jIW56ioO4AAABAGk0pAAAAACAYmtIM9OvXL3SEhLzlKQrqDgAAAKTRlGZg2bJloSMk5C1PUVB3AAAAII2mNANTpkwJHSEhb3mKgroDAAAAaVwSpkYduSSMmSkPdW6WtzxFQd0BAABQVFwSBgAAAACQSzSlAAAAAIBgaEozkLdDk/OWpyioOwAAAJBGUwoAAAAACIamNAOlUsXzeYPJW56ioO4AAABAGk0pAAAAACAYmlIAAAAAQDA0pRmYPHly6AgJectTFNQdAAAASDN3D51BpVLJ8z4yad2kOVXnN00dnVESAAAAAOhezKzR3SsOssKe0gz0798/dISEvOUpCuoOAAAApNGUZmD58uWhIyTkLU9RUHcAAAAgjaYUAAAAABAMTWkGhg0bFjpCQt7yFAV1BwAAANJoSjPQ2NgYOkJC3vIUBXUHAAAA0mhKMzBx4sTQERLylqcoqDsAAACQxiVhatSRS8KYmfJQ52Z5y1MU1B0AAABFxSVhAAAAAAC5RFMKAAAAAAiGpjQDS5cuDR0hIW95ioK6AwAAAGk0pRnI26irectTFNQdAAAASKMpzcC4ceNCR0jIW56ioO4AAABAWq/QAbYU7Y3OCwAAAABIY08pAAAAACAYmtIM7HTkaaEjJEybNi10hEKi7gAAAEAaTWkGtq8/KnSEhIkTJ4aOUEjUHQAAAEijKc3AMxeNCR0hwcxCRygk6g4AAACk0ZQCAAAAAIKhKQUAAAAABENTmoFt9jwwdISEMWPydThxUVB3AAAAII2mNAO7fHpy6AgJs2bNCh2hkKg7AAAAkEZTmoHn/3h+6AgJY8eODR2hkKg7AAAAkEZTmoF1ix4JHSFh9uzZoSMUEnUHAAAA0mhKAQAAAADB0JQCAAAAAILptblPNLN9JN1QNun9ks6TtIOkCZJWxtO/7e63bHbCLcDu38zXYZvuHjpCIVF3AAAAIG2z95S6+z/dvd7d6yUNl7RW0k3x7B81zyt6QypJr827NXSEhOnTp4eOUEjUHQAAAEjrrMN3Py5pkbs/00nr26K8+Jefho6QcMopp4SOUEjUHQAAAEjrrKZ0vKTryh6fZmaPmdmVZrZjJ70GAAAAAGAL0+Gm1My2kjRO0h/iSb+QtKekeknLJV3axvMmmlmDmTWsXLmy0iIAAAAAgC1cZ+wp/Q9Jc919hSS5+wp3f8vd35Y0Q9IHKz3J3ae7e8ndS3369OmEGPnV59hzQ0dImDlzZugIhUTdAQAAgLTOaEqPU9mhu2bWr2zeMZIe74TX6Na26vuB0BEShg8fHjpCIVF3AAAAIK1DTamZvVPS4ZL+t2zyD83s/8zsMUkflfS1jrzGlmDpz08MHSFhwIABoSMUEnUHAAAA0jb7OqWS5O5rJO3catoXOpQIAAAAAFAYnTX6LgAAAAAAm4ymNAPbHXBk6AgJEyZMCB2hkKg7AAAAkEZTmoGdj/rv0BESpk+fHjpCIVF3AAAAII2mNAPLr/pq6AgJjAIbBnUHAAAA0mhKM/DGikWhIyTMnTs3dIRCou4AAABAGk0pAAAAACAYmtIM9Nxup9AREvr16xc6QiFRdwAAACCNpjQDu556TegICcuWLQsdoZCoOwAAAJBGU5qBl+/7XegICVOmTAkdoZCoOwAAAJBm7h46g0qlkjc0NISOUVXdpDmb/dxnLhqjPNS5mZnlKk9RUHcAAAAUlZk1unup0jz2lAIAAAAAgqEpBQAAAAAEQ1OagfeeeFnoCAl5P1R6S0XdAQAAgDSaUgAAAABAMDSlGXju6jNCR0golSqeX4wuRt0BAACANJpSAAAAAEAwNKUAAAAAgGBoSjPw7kOOCx0hYfLkyaEjFBJ1BwAAANLM3UNnUKlU8ryPTFo3aU6Hnt80dXQnJQEAAACA7sXMGt294iAr7CnNwJKfnRA6QkL//v1DRygk6g4AAACk0ZRm4K3VL4aOkLB8+fLQEQqJugMAAABpNKUAAAAAgGBoSjOwVd89Q0dIGDZsWOgIhUTdAQAAgDSa0gz0O+nHoSMkNDY2ho5QSNQdAAAASKMpzcCqW38SOkLCxIkTQ0coJOoOAAAApNGUZmD1/L+EjpAwY8aM0BEKiboDAAAAaTSlAAAAAIBgaEoBAAAAAMHQlGZgwH9dHTpCwtKlS0NHKCTqDgAAAKTRlGbgjRVPhY6QwCiwYVB3AAAAII2mNAMrb/xu6AgJ48aNCx2hkKg7AAAAkEZTCgAAAAAIhqYUAAAAABAMTWkGdjrytNAREqZNmxY6QiFRdwAAACDN3D10BpVKJW9oaAgdo6q6SXO6dP1NU0d36foBAAAAIBQza3T3UqV57CnNwDMXjQkdIcHMQkcoJOoOAAAApNGUAgAAAACCoSkFAAAAAARDU5qBbfY8MHSEhDFj8nU4cVFQdwAAACCNpjQDu3x6cugICbNmzQodoZCoOwAAAJBGU5qB5/94fugICWPHjg0doZCoOwAAAJDWK3SAIli36JF2l2nvkjOdecmY2bNnd9q6UDvqDgAAAKSxpxQAAAAAEAxNKQAAAAAgGJrSDOz+zXwdtunuoSMUEnUHAAAA0mhKM/DavFtDR0iYPn166AiFRN0BAACANJrSDLz4l5+GjpBwyimnhI5QSNQdAAAASKMpBQAAAAAEQ1MKAAAAAAiGpjQDfY49N3SEhJkzZ4aOUEjUHQAAAEijKc3AVn0/EDpCwvDhw0NHKCTqDgAAAKTRlGZg6c9PDB0hYcCAAaEjFBJ1BwAAANJ6dXQFZtYk6TVJb0na4O4lM9tJ0g2S6iQ1Sfqsu7/U0dcCAAAAAGxZOmtP6Ufdvd7dS/HjSZLudPe9JN0ZPwYAAAAAIKGrDt/9pKSr4/tXSzq6i16nW9jugCNDR0iYMGFC6AiFRN0BAACANHP3jq3A7N+SXpLkkqa5+3Qze9ndd4jnm6SXmh+XPW+ipImS9L73vW/4M88806EcXa1u0pygr980dXTQ1wcAAACAzWVmjWVH1iZ0xp7SD7v7MEn/IelUMzusfKZHXW+q83X36e5ecvdSnz59OiFGfi2/6quhIyQwCmwY1B0AAABI63BT6u5L45/PS7pJ0gclrTCzfpIU/3y+o6/Tnb2xYlHoCAlz584NHaGQqDsAAACQ1qGm1MzeaWbbN9+XdISkxyXNlNR8HZQTJf2pI68DAAAAANgydfSSMH0l3RSdNqpekq5191vN7BFJvzezL0l6RtJnO/g63VrP7XYKHSGhX79+oSMUEnUHAAAA0jrUlLr705IOqDB9laSPd2TdW5JdT72my1+jloGYmgdLWrZsWVfHQQXUHQAAAEjrqkvCoMzL9/0udISEKVOmhI5QSNQdAAAASKMpzcAr918XOkLC+eefHzpCIVF3AAAAIK2j55QiI6GvkwoAAAAAXYE9pQAAAACAYGhKM/DeEy8LHSGhoaEhdIRCou4AAABAGk0pAAAAACAYmtIMPHf1GaEjJJRKpdARCom6AwAAAGk0pQAAAACAYGhKAQAAAADB0JRm4N2HHBc6QsLkyZNDRygk6g4AAACkmbuHzqBSqeR5H5l0S7hOaNPU0aEjAAAAACggM2t094qDrLCnNANLfnZC6AgJ/fv3Dx2hkKg7AAAAkEZTmoG3Vr8YOkLC8uXLQ0coJOoOAAAApNGUAgAAAACCoSnNwFZ99wwdIWHYsGGhIxQSdQcAAADSaEoz0O+kH4eOkNDY2Bg6QiFRdwAAACCNpjQDq279SegICRMnTgwdoZCoOwAAAJBGU5qB1fP/EjpCwowZM0JHKCTqDgAAAKTRlAIAAAAAgqEpBQAAAAAEQ1OagQH/dXXoCAlLly4NHaGQqDsAAACQRlOagTdWPBU6QgKjwIZB3QEAAIA0mtIMrLzxu6EjJIwbNy50hEKi7gAAAEAaTSkAAAAAIBiaUgAAAABAMDSlGdjpyNNCR0iYNm1a6AiFRN0BAACANJrSDGxff1ToCAkTJ04MHaGQqDsAAACQRlOagWcuGhM6QoKZhY5QSNQdAAAASKMpBQAAAAAEQ1MKAAAAAAiGpjQD2+x5YOgICWPG5Otw4qKg7gAAAEAaTWkGdvn05NAREmbNmhU6QiFRdwAAACCNpjQDz//x/NAREsaOHRs6QiFRdwAAACCNpjQD6xY9EjpCwuzZs0NHKCTqDgAAAKT9//buP9bq+r7j+OsVlLUpRgtllICCrcbGPyYqczaaptN03lZTt9Qsmq6yrhOToYGkgLOz7gAADZhJREFUy2RNGmBNkzbZql21Jqw6cXF2HbYbsQZnWpPNJnMipfUHMwODE0TopCKuSwn2vT/Oh8v5ci6C3Hs+n8/3fJ+P5Oae7/fce86Lz/vec/Ph/fl+DpNSAAAAAEAxp5QOgHwWrvz+hLcP2/GVq3PGAQAAAAA6pTksuK2uZZu15emKiCgdAQAAAKgOk9IMDmzZWDpCQ215umLt2rWlIwAAAADVYVKawb5H7ywdoaG2PF1x8803l44AAAAAVIdJKQAAAACgGDY6wriJNj/qx0ZIAAAAAKYandIMZn/qi6UjNNSWpys2bNhQOgIAAABQHSalGUyfc07pCA215emKiy++uHQEAAAAoDpMSjPY9c0lpSM01JanK+bNm1c6AgAAAFAdJqUAAAAAgGKYlAIAAAAAimFSmsGMC64qHaGhtjxdcdNNN5WOAAAAAFSHSWkGs8ZuLR2hobY8XbF27drSEQAAAIDqMCnNYPd9y0tHaKgtT1ew+y4AAAAwiElpBgf3bC8doaG2PF2xefPm0hEAAACA6jApBQAAAAAUc9KTUttn2n7c9vO2n7O9PJ1fbXuX7S3p4xNTF7edps2YWTpCQ215umLu3LmlIwAAAADVOWUS33tI0ucjYrPt0yQ9bfuxdN/tEfGXk483GuYvu790hIba8nTFK6+8UjoCAAAAUJ2T7pRGxO6I2JxuH5C0VdK8qQo2Sl5/4oHSERpqy9MVq1evLh0BAAAAqM6UXFNqe6GkCyU9mU7dYvuntu+1/d6peI422/+jB0tHaKgtT1esWbOmdAQAAACgOpNZvitJsj1D0kOSVkTEG7bvlvQlSZE+/5WkP5rg+5ZKWipJZ5111mRjYAQsXPn9t71/x1euzpQEAAAAQC6T6pTaPlW9CekDEfFdSYqIPRHxVkT8StLfSLpkou+NiLURsTgiFs+ePXsyMQAAAAAALTWZ3Xct6R5JWyPia33n+7cY/T1Jz558vNHw/iV3lI7QUFuerti0aVPpCAAAAEB1JrN89zJJn5H0jO0t6dwXJN1ge5F6y3d3SLp5UgkBAAAAACPrpCelEfGEJE9w1yMnH2c0vbpuhRbc9nDpGONqy3Oi2n7N6eLFixURpWMAAAAAVZmS3XcBAAAAADgZTEoBAAAAAMUwKc3g9MtuKB2hobY8XbFq1arSEQAAAIDqMCnN4IzLP106QkNtebpi9erVpSMAAAAA1ZnM7rs4QTvvulHzl91fOsa42vKMkrfbjGnnXTfq0IHXMqYBAAAA6kenNIO33txXOkJDbXm6gnEHAAAABjEpBQAAAAAUw/LdDKbP+WDpCA0nm6ft7xNaWm0/BwAAAEAN6JRmMPcPv146QkNtebqCcQcAAAAG0SnN4LWN39CssVtLxxhXKs/xOq01ZBhmt/e1jd+Q6CYDAAAADXRKM3jzJ4+WjtBQW56uYNwBAACAQXRKgXeghm4vAAAAMErolAIAAAAAimFSmsG8P1lXOkJDbXm6gnEHAAAABjEpzeDgnm2lIzTUlqcrGHcAAABgEJPSDH720JdKR2ioLU9XMO4AAADAIDY6AjIq+ZY0AAAAQI3olAIAAAAAimFSmsHMq24pHaGhtjxdwbgDAAAAg5iUZnDaorHSERpqy9MVjDsAAAAwiElpBi999ZrSERpqy9MVjDsAAAAwiI2OMDKOt4lQrscY5vOzERIAAABGDZ1SAAAAAEAxdEozePcHf7N0hIba8nRFDeN+Ip1gurEAAADIiU5pBr9+3arSERpqy9MVjDsAAAAwiElpBnvXrykdoaG2PF3BuAMAAACDWL6bwf9tf6p0hIZh5Sm9SVDtavs5OBY2WwIAAEBOdEoBAAAAAMXQKQWAytCtBgAAXUKnNIMFtz1cOkJDbXm6gnEHAAAABjEpzeDAlo2lIzTUlqcrGHcAAABgEMt3M9j36J06bdFY6RjjasvTFV0Z92EvPZ3shlosfQUAAKgLnVIAAAAAQDF0SoEW4W13Ju9ExpBuKgAAQD50SjOY/akvlo7QUFuermDcAQAAgEFMSjOYPuec0hEaasvTFYw7AAAAMIjluxns+uaSqt4OpLY8XTEq4z7sJcQsUQYAAOgWOqUAAAAAgGLolALIik4oAAAA+tEpzWDGBVeVjtBQW56uYNwBAACAQXRKM5g1dmvpCA215ekKxr07jtcNnuxbzgz78QEAAHKiU5rB7vuWl47QUFuermDcAQAAgEFMSjM4uGd76QgNteXpCsYdAAAAGMTyXQDomKnYbGrUlyDXng8AgFFCpzSDaTNmlo7QUFuermDcAQAAgEF0SjOYv+z+0hEaasvTFYx7e5TeqGgUdOHfOGx0awEAXUGnNIPXn3igdISG2vJ0BeMOAAAADGJSmsH+Hz1YOkJDbXm6gnEHAAAABrF8FwDeIZamDn8MWLp6fIwRAGBU0CkFAAAAABRDpzSD9y+5o3SEhtrydAXjjlzo5A7fiYzx8TqVXeg215BhmGp4eyUAGAVD65TaHrP9gu1ttlcO63kAAAAAAO01lE6p7WmS7pL0MUk7JT1le0NEPD+M56vdq+tWaMFtD5eOMa62PF3BuANTZ7IduBzd5No71m3oYrYhY+1Kv8UVNapDF+o06v/GqVihU7NhdUovkbQtIl6MiIOSvi3p2iE9FwAAAACgpYY1KZ0n6eW+453pHAAAAAAA4xwRU/+g9nWSxiLij9PxZyT9VkTc0vc1SyUtTYfnSXphCp76fZL+ZwoeB+VQw9FAHduPGrYfNWw/ath+1HA0UMepsSAiZk90x7B2390l6cy+4/np3LiIWCtp7VQ+qe1NEbF4Kh8TeVHD0UAd248ath81bD9q2H7UcDRQx+Eb1vLdpySda/ts29MlXS9pw5CeCwAAAADQUkPplEbEIdu3SHpU0jRJ90bEc8N4LgAAAABAew1r+a4i4hFJjwzr8Y9hSpcDowhqOBqoY/tRw/ajhu1HDduPGo4G6jhkQ9noCAAAAACAEzGsa0oBAAAAADiukZmU2h6z/YLtbbZXls6Didm+1/Ze28/2nZtp+zHb/5U+vzedt+2/TjX9qe2LyiXHYbbPtP247edtP2d7eTpPHVvC9rts/4ftn6Qarknnz7b9ZKrVP6SN6mT719LxtnT/wpL5cYTtabZ/bPvhdEwNW8b2DtvP2N5ie1M6x+tpi9g+w/Z62/9pe6vtD1PD9rB9Xvr9O/zxhu0V1DCvkZiU2p4m6S5JH5d0vqQbbJ9fNhWO4T5JY0edWynpBxFxrqQfpGOpV89z08dSSXdnyoi3d0jS5yPifEmXSlqWft+oY3v8UtIVEXGBpEWSxmxfKumrkm6PiHMk/VzS59LXf07Sz9P529PXoQ7LJW3tO6aG7fTbEbGo7y0neD1tl69L2hgRH5J0gXq/k9SwJSLihfT7t0jSxZJ+Iel7ooZZjcSkVNIlkrZFxIsRcVDStyVdWzgTJhAR/ypp31Gnr5W0Lt1eJ+l3+87fHz3/LukM23PzJMWxRMTuiNicbh9Q74/vPFHH1ki1eDMdnpo+QtIVktan80fX8HBt10u60rYzxcUx2J4v6WpJ30rHFjUcFbyetoTt0yV9RNI9khQRByPidVHDtrpS0vaIeEnUMKtRmZTOk/Ry3/HOdA7tMCcidqfbr0qak25T18qlJYAXSnpS1LFV0rLPLZL2SnpM0nZJr0fEofQl/XUar2G6f7+kWXkTYwJ3SPozSb9Kx7NEDdsoJP2L7adtL03neD1tj7Ml/UzS36al9N+y/R5Rw7a6XtKD6TY1zGhUJqUYEdHbDpotoVvA9gxJD0laERFv9N9HHesXEW+lpUrz1Vtt8qHCkfAO2L5G0t6IeLp0Fkza5RFxkXpLApfZ/kj/nbyeVu8USRdJujsiLpT0vzqyzFMSNWyLdA3+JyX949H3UcPhG5VJ6S5JZ/Ydz0/n0A57Di97SJ/3pvPUtVK2T1VvQvpARHw3naaOLZSWmT0u6cPqLUE6/P7V/XUar2G6/3RJr2WOiqbLJH3S9g71Llm5Qr3r2qhhy0TErvR5r3rXsV0iXk/bZKeknRHxZDper94klRq2z8clbY6IPemYGmY0KpPSpySdm3YdnK5e631D4Uw4cRskLUm3l0j6577zN6Zdzi6VtL9vGQUKSdeh3SNpa0R8re8u6tgStmfbPiPdfrekj6l3bfDjkq5LX3Z0DQ/X9jpJPwze5LqoiPjziJgfEQvV+5v3w4j4tKhhq9h+j+3TDt+W9DuSnhWvp60REa9Ketn2eenUlZKeFzVsoxt0ZOmuRA2z8qj8TbL9CfWur5km6d6I+HLhSJiA7QclfVTS+yTtkbRK0j9J+o6ksyS9JOn3I2Jfmvzcqd5uvb+Q9NmI2FQiN46wfbmkf5P0jI5cy/YF9a4rpY4tYPs31Nu0YZp6/zn5nYj4C9sfUK/rNlPSjyX9QUT80va7JP2detcP75N0fUS8WCY9jmb7o5L+NCKuoYbtkur1vXR4iqS/j4gv254lXk9bw/Yi9TYcmy7pRUmfVXptFTVshfSfQv8t6QMRsT+d4/cwo5GZlAIAAAAA2mdUlu8CAAAAAFqISSkAAAAAoBgmpQAAAACAYpiUAgAAAACKYVIKAAAAACiGSSkAAAAAoBgmpQAAAACAYpiUAgAAAACK+X+PmfEKu+ADjwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "items_per_user=df.groupby(['user']).count()['rating']\n", + "\n", + "plt.figure(figsize=(16,8))\n", + "plt.hist(items_per_user, bins=100)\n", + "\n", + "# Let's add median\n", + "t=items_per_user.median()\n", + "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", + "plt.text(t*1.1, plt.ylim()[1]*0.9, 'Median: {:.0f}'.format(t))\n", + "\n", + "# Let's add also some percentiles\n", + "t=items_per_user.quantile(0.25)\n", + "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", + "plt.text(t*1.1, plt.ylim()[1]*0.95, '25% quantile: {:.0f}'.format(t))\n", + "\n", + "t=items_per_user.quantile(0.75)\n", + "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", + "plt.text(t*1.05, plt.ylim()[1]*0.95, '75% quantile: {:.0f}'.format(t))\n", + "\n", + "plt.title('Number of ratings per user', fontsize=30)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6UAAAHvCAYAAACsfXllAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzde5wU5ZX/8e8BREW8oYgwqBhEgyiO0CrG1RCzBhYB7wluvLC6DJto1JgYSfwp4CWriSZeYxxWBXa9xNXVABrwHqPxkhkEY7xCgkFARBTlohL0/P6oGmiGnu4Giqeq8PN+vfpFd9VTdZ4+XTPM6afqKXN3AQAAAACQhlZpdwAAAAAA8MVFUQoAAAAASA1FKQAAAAAgNRSlAAAAAIDUUJQCAAAAAFJDUQoAAAAASA1FKQAEYGZjzMzjR/+0+5M3ZtbBzK4wsxfNbKmZfR7ncknafdtUzGx80THTLe3+ID1m1r/oWBiTdn8AIGlt0u4AgM2LmTW/+XE/d3++wjbDJN0Vvxzr7mM2Rd+QT2bWSdJzkrql3JWNEn8Z0T9+Od7d56TWGWx2zOw8STtIWuLu16bdHwBYHxSlADa1n0r6etqdQK5dpDUF6TOS/kfSQkku6R8p9WlD9Jc0On7+pKQ5aXUEm6XzJO0h6S1JFKUAcoWiFMCmdqSZ/bO7P5p2R5Bbg+J/P5D0DXdfkWZnQnH34ZKGp9wNZIC7PynJ0u4HAGwqXFMKYFMpLhx+mlovsDnYLf739S9KQQoAwBcJRSmATWWupPvj5weZ2fFpdga51jb+99NUewEAADYJilIAm9L/k/R5/PxyM2u9oTsqmnnyyY1ta2ZPNrWJX7cyszPi5e+a2XIze9nM/p+Zbdts213N7DIze8nMPjKzD83sKTP75ga8p0Fm9lsze9vMPo3/vcvMDl2PfexsZheZ2R/M7B0zW2lmi+LXPzKz9hW2nxPnYk78eiszO8fMnjazhfEst0+u73sr2n87M/u+mT0R9+/TOMdPm9mPzWz7FrZbPVtx0eKvFn22GzSTsZkNL9p2eLysYGb/ZWaz4s9+rf1a5PB49t/HzWx+/D6Wm9nfzOxuMxtSJuaY+H2MLlr8RIn38mSz7crOvltqRlYz293MrjGz1+L+LTGzP5rZd82sqkt2zOw4M3sw/vw/iY+R/zGzQ1rKYQv7OcbM7jGzv5rZinhf88xsppn9b9ynnarpUwv7XytvZtbRzC41sz/HP5sfmVmjmY0ys63XY78D49y/adFMzyvMbHa87J8qbLvex1eVfWpx9t2mn2FF15NK0h4ljq0WPysza2tmZ5rZJDObG39OSyz6HXdNqWOv2fbrHKdmdoKZ/S7+Wfk4Ph5/bmYdm227vZldEH9OH5jZMjP7k5n9h5nxNyrwReLuPHjw4JHYQ9HkMy7ptfj1+KJlw1vYZlhRmzEV9vvkevShZFtFk8w0tWkv6dGi180f0yXtGG93qKR3y7S9ukyfxhS16y/ppjL7+UzS6Cre53BJH5XZj0t6R9KhZfYxJ243R9Kekl4usY+KOW9h3/0kzavQv/cUXSdaLl/lHv3Xs0/Di49HSaMkrSq3X0m3V9mX30nabiPey5PNthtftK5bif32L1o/RtJARdfdtrT/hyVtWSY3W0i6p8z2qyT9oHkOS+xna0lTqnzP5yXwu+ZJSQdKertMnDcl7Vlhfx0lPVZFn/9L0hZJHV9Vvte1PusWfoYrPUp9VgVJf62w3aeSRpbpW/Fx2l3RRGQt7WuOpD3i7faRNKtM23sk2YYeHzx48MjXg4mOAGxqYySdrOgUzDFmdqe7r0y3S2u5XdHswM8o+iPoHUUjDmfF/x4o6VozGy1pmqL38V+Snpa0UtLhkkYomjjuB2Y21StP6nSupGMVFWT/JeklSe0UFRUnKDqLZYyZLXb3G0vtwMzO1ZoZNldIulfSHyUtlrRzvK+hkjpJetTMDnL3V8r0aUtJ/yepV/ze7pM0X9Ef6p0qvJ9S/TtQ0uOKChRJelHSnZL+LmlXSd+UdJiknSRNMbNveDSZS5O7Jc2InzedBv4XRaPvxV5e374V+ZaiPH0oaYKkRkVfCBwQL2uytaI/zH8v6QVJsyUtV5SbvSWdKqlDvK+Jij7bYk3vZVgcU5IuLtH39zbivdRKukDRZDi3SHo27nNB0n9I2kbSUYpmMr6khX3USzopfv6JomLjWUU5KUg6U9LVio61cn4q6ej4+QJFRcpfJC1T9CXQXoq+4Dmi+rdX1vaKjt0aRYX3A5LeV1T0nClp9zjmY2ZW6+4fNd+BmXVQ9F67x4tmxvucpehsj/0UFZk18T7bqPIkVNUeXxurTtHvj3pFx+SieFlz04tfWHRGxqPxtq7o99vDir5I2lrRZ3RqvP7XZvapu4+v0JcrJZ2o6Hfa/yiaCXhXRb8j91P0O3WimR0j6RFF+fxN3I+lkvoo+t27jaJj8WFFvyMBbO7Srop58OCxeT205lvu14qWXV+0/JwS26Q5UuqSflKiTUetGeVbpaioeFdS7xJtTy3a10MtxBzTLObLknYp0e5YRbc5cUWFz+4l2hSK2rxYqk3cbrCiwtklPddCmznN+vX9BI6BVlp7xPVaSa1KtLu4qM3fJW21sZ99FX0b3uz9viqpS4VtDpe0Q5n122jtEcavVnEM9K+ir+OL2ncrsb5/s/fylqQeJdodXHS8vK8So6WKvphp2s8iSfuVaNOtxPEyvFmb1pKWaM2o2DrHeLOfsS9vxGfZfGTtrBJt2kt6oqjNDS3s6/54/eeSzm2hTXtFhVvTvgYmcXxV+V6LP+sxLbRp+mzmVLG/beOfOVc0ut7SMbtXfFy5oi8Vdq5wnLqkX6vZz7uiIndmUZsGRV+mHVlif0fEn4NLemVjc8eDB498PDhfH0AIVygqsCTpIqtwnWNg09x9ndmB3X2RpKZRytaKRjfOdveXSrT9b0WnB0rRLXAqnYWyStK33P3dEvt6QNI18ct2kr5TYvtLFI3ULJU02N3/XiqIu09RNHIhSYeY2Vcq9Ot+d/9lhTbVGKxoxFWSnlNU6H7evJG7XybpwfjlbpJOSSD2+nBJw9x9ftlG7n9w9yVl1i9XNHrWdIyfmlwX18sp7v5m84Xu/oKi0ShJ2lFRkdrc94uen+3u64xAu/scVR4d7Kho5FKSflvqGC/a3yJ3f63C/qp1t7vfVCLGMkVfejWNjp5pZjsUtzGzPlozuv1Ld7+uhf427atplPP8Cn2q6vhKyQitmdX6NHf/falG7j5L0r/FL7dR6RHYYi8rOn7W+nl394+15neRJPVVVFw/XiLmU4pGTiWpp5nt1rwNgM0PRSmATc7dF2rNqaa7KLrJe1aUPD029kzR84Uqf9ri0/G/W2rNKYAtmebufymz/lpFp/lJ0nHFK8xsR605NfIud59XIdb/FD3/RoW2N1RYX63imZZ/7u5epm3xH6qhZ2j+g7vPTGJH7r5U0p/jl4cksc/19KK7/6HM+uI//vctXmFmW2nNsTFf0v+2tBOPTrFe54uZIh+3FGcTu6alFfHvn6afg60VnVJbrOlLBC+3n3hfH0h6KH55hJltWaZ5YsfXJtD0nt9w98nlGsaFY1NhXel3yC3uvqqFdcW/Tz9TdJp5S54ueh7yOAKQEq4pBRDKzxWN+nWQ9EMz+5W7v59ynyTp+TLrFhY9byw12tdC2x0rxHys3Ep3f8fMXlV0DdbeZra9uzeNzhymNV8ofmZmza9fbG6Louc9y7T7TNE1dUloGolzRdeNlfNHrbnWMHQxV66IW0tcfHxT0jGKRs07KeqzlWjeNZHerZ/nKqwv/vKi+fF5gNYcJ09VOM6l6PT33qVWuPuHZvaComPgn83sfkVfdvzB3f9RYb8b6kNF12uW87ik78bPD1J0nW+Tw+N/l0g62KzUR7qWLYv+/ZKiU3RLqfr4CsmiGa+bPr+FVfwOkaKfUan87xCp+t+nrxf9TqvUttLvUwCbAYpSAEHEf6xeJekqRaf3jZL0o3R7JSmaGKglxffFLNeuedutKrSdVWF9U5v9FBU9u2rNKYPditp8R6VP721JuT/uFrv7J+uxr3I6x/++E48gtsjdPzez2YoKow5m1tbDTYRVaZRZkmRm+yua+KlHlfvdboN7tOEqTZJU7vjsUvT8r1XEqtTmLEVfvGyn6LTYYyUtN7PnFY2APSrpmSqK32rNrjAaL639M9el2bpu8b87as2kWtUq9zNV1fGVgt205outw7WmKK9GpQKxxd+T7v5pUcGf5O9TAJsBTt8FENINWnMa2Nlm1vyPw+DW4w/jpP6AlqIJPipZXvS8+Brckvf1rFLbMus+LrNufTXd23V52VZrLCt6vm2LrZJX8T3Hs7I+qjUF6VxJNyuaQflfFZ1yfFz8aDolO43/Wzfm+Nym6Pn6HpvrcPcGRbMBT9SaHG8j6UhF10M/JWm2mX17/bta0sb8PEn5+JlK0sa83y0qrE/j9ymAzQAjpQCCcfePzewyRX/Ub63oD9T/SDJGTm643q6KNsWFwrIWnp/h7rcn06VELZW0g9Z+D+UUFwllR1ZTcLai66Cl6LYe/97SNXNmdlGwXiWruGBb32OzJHf/m6TTzWykoluLfEXSP0n6qqKf/W6S/sfM9ig10dh62pifp6bXO0j6u7vvsZF9yYPi9z/R3U9PrScAEMvDH28ANi+3KrrPoxTNhLlXlds1ndJZbmRCiu7RmXXVvOemNq7o3qlNik8JTOPaxWosiP/dtdJMyxadz9c0MdTigKfuVuuf439XSTqvzCQuUnQPxjwqnh32S1W0r6aNJMndP3H3J9z9Cnf/F0UF/oWKjmtJusTMdqq+qyV1t8oXghb/zDWfDbfpZ2oXM6s0Erg5yMPvEABfMBSlAIKKJzu5JH7ZRtKlVW7adEuOSqf8pjHz6fo6stxKM9tVayYUeaPZhCBPac0f9JVmwkzLC/G/pjVFXUu+ojUjpS+Ua5iSTvG/i8vdFsbMDlR0O5Ryik9ZrDibTkAzFd3HVIpmlK30t0H/DQ3k7svc/WeKrtGVosmCDtrQ/cW2l9SnQpuvFT3/U7N1TbdD2UrRPTLzqun4Kntsuft7kl6JX/YzszSugQaAtVCUAkjDXVpzW4lhiia5qaTpj6g9zKzcSM05G9OxQAaaWblZLM9RdG9USfq/4hXxfR+nxi//ycyyWJjeV/T8hxVGsS5sYbusaLpecRczK3e96yVl1jUpPm2y2lObN7l4gquH45ddJJ3UUlsz668WZt5dT3OKnidxKVGL9ww1s45acw/cFVrz89NkYtHz0WbWWvnUdHxVc2xNiP9tp2jSOQBIFUUpgODimTKbrr8zSd+rYrPiPySvKlXomNmlqjwylwVtJP0m/mN5LWY2RNIP45crFF1/29z/05qRrbvNrPl9F5vvcw8zu9rMdinXLkEPas2kP4dJ+nmp0Tcz+4mkIfHLuZLuCNO99dI0qmaSLm++0iKXKZphtpK/FT2vNLIX2i+Lnt9oZvs1b2Bm3SSNL7cTMzvQzC42s05l2uysNYWvq/x9T6v1r2a2zvXpZraNoi/Bmib3ua35iLe7P681X4gcLumOcqOHZtbGzI43s7MS6HeSmo6vncxs9wptb5L0Vvx8lJldUG6E3My2N7NzzCwPv18B5BATHQFIhbtPMbM/Kjp9s5pv9m9TdAuZDpJOlPQHM7tD0a0wdlc04lpQdP/BYZuk08l5QFER8xczGyfpz4pGLAYo+mO9qeC+0N3nNt/Y3aeb2XckjVN0i4bfmdkzkn6n6A/TfyjK05cVTS5TiDe9dpO9o7X797mZnaLoHqRbS/qBpK/Fn9fbik6J/WbcN8X9PS3BW9Ik6VeSzlA0cn2OmdUqGr1+R9GtNf5V0oGKRvI/ltS3zL7+oOi9biHpAjNrKsiabn/xvruncgqzuz9mZuMlDVd0Xfaf4td/VHRaaEFRHraTdK+in0Fp3VlUt1d0Sv7o+Jj8o6Q3FE1g1UHS/opy1iFuf4e7/30juz9D0URFN8f33Lxf0en+e0s6U2uu9f2b1nwZ1twZcfv9JX1L0gAzu0dSg6QPFB3HNYo+66MU/dzdupH9TtpjkobGz//PzG5WdH1302f0Z3efJ0nuvjzO1e8VfaY/kzTSzO5TdCwvi5d/SdE9Z/srup7/1DBvBcAXDUUpgDT9WGuu5yrL3ReZ2amKCoItFY3AHdas2RRFf1xmvSi9TtFkI2dJ+kmJ9S7pUne/saUduPutZvauosK0k0rno9hiScGKPnefYWZfVzQC1VnRyGCp0cH3Jf2ruz8Zqm/rI34f35N0o6Kzi47QutcdvirpGEn/VWFf75nZ1YqO+/Za93rq32sjrtdMQJ2ifp2o6PrK/9Das2N/rmgU/0OtKUqbz5bcdL1za5XOVbHfxDE31oeS/k3Rz/+A+NHcbEnfcPePSu3A3T8ys3+SVK+oKN0h7lu5/jWfMClttyn6nbK3oi9Hmh+P/6aike742D5Y0UjygYomHCt37+hPVfl+uACwQTh9F0Bq3P0prXt9V7n2Dym6/vR2SX9XNCPvIklPKPoGf6i7Z/XegGtx97MlHS1psqI/blfG//5G0mHuPqaKfUyWtKeiwmGSolNgP9aavDyr6N6wQyR1iSc4Ccbdn1V0f8/zFRVcixSNFC6O+3aRpO7uPi1kv9aXu9+sqOD/X0UjpP+Q9K6iUcDzJRXcfVaV+/qJpJMVHffvaM2s0qlz93+4+0mSTlDUv0WKCpG/Kzq1+jB3v0ZS8Wy57zfbx+8VjTb+UNEXEq8pGnX7PP73FUXF01fdfVhSP6/uPkNRYXW5pJcVFcvLJL2o6Iuf3u7+1wr7+Mjdhyn68uTaeNvFimZeXibpTUVnOZyv6Lit5jriYNx9maR+kq6QNF1RsV72fqDu/rqiAvYYRdeZviHpI0mfKRptnqnomtvhkjq7e9W/rwFgfVh0aRcAAEBl8Smex8cvd3L398u134T9aPoD5vfu3j+NPgAAksFIKQAAqEo82dHg+OXMtApSAMDmhaIUAADIzLqbWdcy62sUTSLUNl50S5COAQA2e0x0BAAAJOlQSbeb2VOKZgqerega5Z0UXav4TUWzREvSc4omBQIAYKNRlAIAgCZtJB0ZP1rypKQT3P2zID0CAGz2KEoBAIAUzQRdp+g+nD0V3a+0g6IZghdKel7S3fGszwAAJCYTs+/uvPPO3q1bt7S7scEWLVqkjh07bvYx84g8AQAAAOlrbGx8z91L/mGeiZHSbt26qaGhIe1uAAAAAAA2ATN7q6V1zL6bgDFjxnwhYuYReQIAAACyLROn7xYKBc/zSKmZKXQe04iZR+QJAAAASJ+ZNbp7odQ6RkoBAAAAAKmhKAUAAAAApIaiNAFpnHqc59OdQyJPAAAAQLZRlAIAAAAAUsNERwlgoqPsIk8AAABA+pjoCAAAAACQSRSlAAAAAIDUUJQmYPTo0V+ImHlEngAAAIBs45pSAAAAAMAmxTWlm1iXLl2+EDHziDwBAAAA2UZRmoAFCxZo7ty5+trXvqZ9991XvXr10nXXXbd6/ZgxY1RTU6Pa2lrV1tbqoYcekiQ988wz6t27twqFgt58801J0pIlS/SNb3xDn3/+ecWYm8oDDzygV155ZfXrSy65RI8++qgkqX///ht1788nn3xStbW16tWrl7761a9udF8raSlPr7/++urPo7a2Vtttt52uvfZaSZvm89qUNtXn9eGHH2rIkCE64IAD1KtXL91+++2r102YMEE9evRQjx49NGHChI17AwAAAPhCa5N2BzYXbdq00TXXXKM+ffpo6dKl6tu3r4466ijtu+++kqTvf//7+uEPf7jWNtdcc40eeughzZkzR7/+9a91zTXX6PLLL9dPfvITtWqV3vcFDzzwgAYPHry675deemki+12yZIm++93vaurUqdp999317rvvJrLfDbHPPvtoxowZkqTPPvtMNTU1Ou6441av5/OSbrrpJu27776aPHmyFi1apH322Uff/va3tWzZMo0dO1YNDQ0yM/Xt21dDhw7VjjvumEhcAAAAfLEwUpqAPn36qHPnzurTp48kadttt1XPnj01b968stttscUWWrFihVasWKEttthCs2fP1ty5c9W/f/8Wt5k6daq+/OUva+utt9Y555yjwYMHS4pG966++urV7fbbbz/NmTNHknTssceqb9++6tWrl+rr61e3ad++vS666CIdcMAB6tevnxYuXKg//vGPmjRpki644ALV1tZq9uzZGj58uO699951+vLwww/r0EMPVZ8+fXTSSSdp2bJlZd/vnXfeqeOPP1677767JGmXXXYp2z4JTZ9JOY899pi6d++uPfbYo2y7jfm8+vTpk7vPy8y0dOlSubuWLVumDh06qE2bNpo2bZqOOuoodejQQTvuuKOOOuooTZ06tey+AAAAgJZQlCagsbFxrddz5szRiy++qEMOOWT1shtvvFG9e/fWGWecoQ8++ECS9OMf/1innXaa/vM//1Nnn322LrroIl1++eUtxvnkk080YsQITZ48WcuXL9c777xTVf9uu+02NTY2qqGhQddff70WL14sSVq+fLn69eunmTNn6ogjjtC4ceP0la98RUOHDtXPf/5zzZgxQ927dy+5z/fee0+XX365Hn30UU2fPl2FQkG/+MUvJEWnj06aNGmdbd544w198MEH6t+/v/r27auJEydW1f+N0fyzKeXuu+/WySefvNaypD+vxsbG3H1eZ599tl599VV16dJF+++/v6677jq1atVK8+bN02677ba6XdeuXSt+AQMAAAC0hKI0AXV1daufL1u2TCeccIKuvfZabbfddpKk73znO5o9e7ZmzJihzp076wc/+IEkqba2Vs8995yeeOIJ/fWvf1Xnzp3l7vrWt76lU045RQsXLlwrzmuvvaY999xTPXr00MiRI3XKKadU1b/rr79+9eja3LlzV18P2bZt29Ujd3379l09UleN5557Tq+88ooOO+ww1dbWasKECXrrrbckRaePDh06dJ1tVq1apcbGRj344IOaNm2aLrvsMr3xxhtVx9wQxZ9NKStXrtSkSZN00kknrV62KT4vM8vd5zVt2jTV1tZq/vz5mjFjhs4++2x99NFHVccEAAAAqkFRmoBx48ZJkv7xj3/ohBNO0Le//W0df/zxq9d36tRJrVu3VqtWrTRixAi98MILa23v7rr88st18cUXa+zYsfrZz36mESNG6Prrr68Ys0mbNm3Wmmznk08+kRRNLPToo4/q2Wef1cyZM3XggQeuXrfFFlvIzCRJrVu31qpVq6p+z+6uo446SjNmzNCMGTP0yiuv6NZbby27TdeuXTVgwABts8022nnnnXXEEUdo5syZVcfcEM3z1Nzvfvc79enTR506dVq9bFN8Xs3l4fO6/fbbdfzxx8vMtNdee2nPPffUa6+9ppqaGs2dO3d1u7fffls1NTVV9wUAAAAoRlGaEHfXmWeeqZ49e+r8889fa13xDLD333+/9ttvv7XWT5w4UYMGDVKHDh20YsUKtWrVSq1atdKKFSvWavflL39Zc+bM0ezZsyVJd9111+p13bp10/Tp0yVJ06dP19/+9jdJ0QyqO+64o9q1a6fXXntNzz33XMX3su2222rp0qVl2/Tr10/PPPOMZs2aJSk6tbTSqOcxxxyjp59+WqtWrdKKFSv0/PPPq2fPnhX7synddddd65y6y+cV2X333fXYY49JkhYuXKjXX39dX/rSlzRgwAA9/PDD+uCDD/TBBx/o4Ycf1oABAyr2EwAAACiF2Xer1G3Ug2XXP/PMM/rv//5v7b///qqtrZUk/fSnP9WgQYP0ox/9SDNmzJCZqVu3brrllltWb7dixQqNHz9eDz/8sCTp/PPP16BBg9S2bVvdeeeda8XYaqutVF9fr6OPPlpSNFFQUzFywgknaOLEierVq5cOOeQQ7b333pKkgQMH6te//rV69uypffbZR/369av4XocNG7Z65K/UhDmS1LFjR40fP14nn3yyPv30U0nS5Zdfrr333luXXHKJCoXCOqeE9uzZUwMHDlTv3r3VqlUr/fu///s6BV9Iy5cv1yOPPLLW5yFpk3xe7dq10+GHH56rz+viiy/W8OHDtf/++8vdddVVV2nnnXdeve6ggw6SFF2T2qFDh4r9BAAAAEoxd0+7DyoUCr4x974MoVxRumrpYr1902kBeyPNnz9fb7zxhq6++mpNmTIlaOw8mT9/vrp06ZJ2NyRFp+byeQEAAOCLyMwa3b1Qah2n7yZg5cJZwWNWM6ssyBMAAACQdYyUVqncSOlbVw1W6DyaWfCYeUSeAAAAgPQxUgoAAAAAyCSKUgAAAABAaihKE9BhwNnBYzafMRalkScAAAAg2yhKE7Bt7cDgMevq6oLHzCPyBAAAAGQbRWkC3rpqcPCYZhY8Zh6RJwAAACDbKEoBAAAAAKmhKAUAAAAApIaiNAFbdz8oeMzBg8OfMpxH5AkAAADINorSBOxy4ujgMSdPnhw8Zh6RJwAAACDbKEoT8O69Y4PHHDJkSPCYeUSeAAAAgGyjKE3Ax7P/FDzmlClTgsfMI/IEAAAAZBtF6WbEzHTKKaesfr1q1Sp17Nhxva+r7N+/vxoaGiRJgwYN0pIlSxLt54wZM3TooYeqV69e6t27t37zm9+sXnf44YertrZWtbW16tKli4499thEYwMAAADIljZpdwDJ2WabbfTyyy/r448/1tZbb61HHnlENTU1G7XPhx56KKHerdGuXTtNnDhRPXr00Pz589W3b18NGDBAO+ywg/7whz+sbnfCCSfomGOOSTw+AAAAgOxgpDQBe1wY/hRRdy+5fNCgQXrwwQclSXfddZdOPvnk1euWL1+uM844QwcffLAOPPBA/fa3v5Ukffzxxxo2bJh69uyp4447Th9//PHqbbp166b33ntPknTssceqb9++6tWrl+rr61e3ad++vS666CIdcMAB6tevnxYuXFi273vvvbd69OghSerSpYt22WUXLVq0aK02H330kR5//PGNHiltKU8AAAAAsqHqotTMWpvZi2Y2JX69p5k9b2azzOw3ZtY2Xr5l/HpWvABhjiIAACAASURBVL7bpul6diydMTV4zOKisNiwYcN0991365NPPtFLL72kQw45ZPW6K664QkceeaReeOEFPfHEE7rgggu0fPly3XzzzWrXrp1effVVjR07Vo2NjSX3fdttt6mxsVENDQ26/vrrtXjxYklRsduvXz/NnDlTRxxxhMaNGydJmjRpki655JKy7+OFF17QypUr1b1797WWP/DAA/r617+u7bbbruqclNJSngAAAABkw/qMlJ4r6dWi11dJ+qW77yXpA0lnxsvPlPRBvPyXcbvN2vvTbgwec+TIkSWX9+7dW3PmzNFdd92lQYMGrbXu4Ycf1pVXXqna2lr1799fn3zyif7+97/rqaeeWn0tau/evdW7d++S+77++utXj4bOnTtXb775piSpbdu2q69b7du3r+bMmSNJGjp0qC699NIW38OCBQt06qmn6vbbb1erVmsfis1HeTdUS3kCAAAAkA1VXVNqZl0lHS3pCknnm5lJOlLSv8ZNJkgaI+lmScfEzyXpXkk3mpk551EGM3ToUP3whz/Uk08+uXo0U4pOZb3vvvu0zz77rPc+n3zyST366KN69tln1a5du9VFrSRtscUWig4JqXXr1lq1alXF/X300Uc6+uijdcUVV6hfv35rrXvvvff0wgsv6P7771/vfgIAAADIl2pHSq+V9CNJn8evd5K0xN2bqo+3JTXNqFMjaa4kxes/jNsjkDPOOEOjR4/W/vvvv9byAQMG6IYbblh9neWLL74oSTriiCN05513SpJefvllvfTSS+vs88MPP9SOO+6odu3a6bXXXtNzzz23wf1buXKljjvuOJ122mk68cQT11l/7733avDgwdpqq602OAYAAACAfKhYlJrZYEnvunvpCw03kJnVmVmDmTU0n+QmbzqecHHwmJMmTWpxXdeuXXXOOeess/ziiy/WP/7xD/Xu3Vu9evXSxRdH/f7Od76jZcuWqWfPnrrkkkvUt2/fdbYdOHCgVq1apZ49e2rUqFHrjG621MdS15Tec889euqppzR+/PjVt3+ZMWPG6vV33313IqfuNvUBAAAAQHZZpbNqzew/JZ0qaZWkrSRtJ+l+SQMk7eruq8zsUElj3H2AmU2Lnz9rZm0kvSOpY7nTdwuFgjfdFzOruo16sMV1q5Yu1ts3nRawN9L8+fPVpUuXoDHziDwBAAAA6TOzRncvlFpXcaTU3X/s7l3dvZukYZIed/dvS3pCUtO5l6dL+m38fFL8WvH6xzf360nn/er0yo0StrH3H/2iIE8AAABAtm3MfUovVDTp0SxF14zeGi+/VdJO8fLzJY3auC4CAAAAADZXVc2+28Tdn5T0ZPz8r5IOLtHmE0knJdA3AAAAAMBmbmNGShFrf8CA4DFHjBgRPGYekScAAAAg2yhKE7DTwO8Fj1lfXx88Zh6RJwAAACDbKEoTsGD8ucFjlrptC9ZFngAAAIBsoyhNwMqFs4PHnD59evCYeUSeAAAAgGyjKAUAAAAApIaiNAGt23cIHrNz587BY+YReQIAAACyjaI0AV3Pmhg85vz584PHzCPyBAAAAGQbRWkCljx9R/CYY8aMCR4zj8gTAAAAkG3m7mn3QYVCwRsaGtLuRlndRj3Y4rq3rhqs0Hk0s+Ax84g8AQAAAOkzs0Z3L5Rax0gpAAAAACA1FKUAAAAAgNRQlCZg19OvDR4z66c7ZwV5AgAAALKNohQAAAAAkBqK0gS8M+G84DELhZLXCKMZ8gQAAABkG0UpAAAAACA1FKUAAAAAgNRQlCZg+8NODh5z9OjRwWPmEXkCAAAAss3cPe0+qFAoeNZnSe026sGy6+dceXSgngAAAABAvphZo7uXnPCFkdIEvH3TacFjdunSJXjMPCJPAAAAQLZRlCbgs2XvB4+5YMGC4DHziDwBAAAA2UZRCgAAAABIDUVpAtp26h48Zp8+fYLHzCPyBAAAAGQbRWkCOg+/LnjMxsbG4DHziDwBAAAA2UZRmoDFU28IHrOuri54zDwiTwAAAEC2UZQmYNnMacFjjhs3LnjMPCJPAAAAQLZRlAIAAAAAUkNRCgAAAABIDUVpAmq+OyF4zHnz5gWPmUfkCQAAAMg2itIErFw4K3hMZpWtDnkCAAAAso2iNAGL7rsseMyhQ4cGj5lH5AkAAADINopSAAAAAEBqKEoBAAAAAKmhKE1AhwFnB495yy23BI+ZR+QJAAAAyDaK0gRsWzsweMy6urrgMfOIPAEAAADZRlGagLeuGhw8ppkFj5lH5AkAAADINopSAAAAAEBqKEoBAAAAAKmhKE3A1t0PCh5z8ODwpwznEXkCAAAAso2iNAG7nDg6eMzJkycHj5lH5AkAAADINorSBLx779jgMYcMGRI8Zh6RJwAAACDbKhalZraVmb1gZjPN7C9mNjZePt7M/mZmM+JHbbzczOx6M5tlZi+ZWZ9N/SbS9vHsPwWPOWXKlOAx84g8AQAAANnWpoo2n0o60t2XmdkWkp42s9/F6y5w93ubtf8XST3ixyGSbo7/BQAAAABgLRVHSj2yLH65RfzwMpscI2livN1zknYws84b31UAAAAAwOamqmtKzay1mc2Q9K6kR9z9+XjVFfEpur80sy3jZTWS5hZt/na8bLO1x4XhTxF1L/e9AJqQJwAAACDbqipK3f0zd6+V1FXSwWa2n6QfS/qypIMkdZB04foENrM6M2sws4ZFixatZ7ezZemMqcFj1tfXB4+ZR+QJAAAAyLb1mn3X3ZdIekLSQHdfEJ+i+6mk2yUdHDebJ2m3os26xsua76ve3QvuXujYseOG9T4j3p92Y/CYI0eODB4zj8gTAAAAkG3VzL7b0cx2iJ9vLekoSa81XSdqZibpWEkvx5tMknRaPAtvP0kfuvuCTdJ7AAAAAECuVTP7bmdJE8ystaIi9h53n2Jmj5tZR0kmaYak/4jbPyRpkKRZklZI+rfkuw0AAAAA2BxULErd/SVJB5ZYfmQL7V3SWRvftfzoeMLFwWNOmjQpeMw8Ik8AAABAtq3XNaUorW2nvYLH7Nu3b/CYeUSeAAAAgGyjKE3AvF+dHjxmTc1mfZedxJAnAAAAINsoSgEAAAAAqaEoBQAAAACkhqI0Ae0PGBA85ogRI4LHzCPyBAAAAGQbRWkCdhr4veAx6+vrg8fMI/IEAAAAZBtFaQIWjD83eExmla0OeQIAAACyjaI0ASsXzg4ec/r06cFj5hF5AgAAALKNohQAAAAAkBqK0gS0bt8heMzOnTsHj5lH5AkAAADINorSBHQ9a2LwmPPnzw8eM4/IEwAAAJBtFKUJWPL0HcFjjhkzJnjMPCJPAAAAQLaZu6fdBxUKBW9oaEi7G2V1G/Vgi+veumqwQufRzILHzCPyBAAAAKTPzBrdvVBqHSOlAAAAAIDUUJQCAAAAAFJDUZqAXU+/NnjMrJ/unBXkCQAAAMg2ilIAAAAAQGooShPwzoTzgscsFEpeI4xmyBMAAACQbRSlAAAAAIDUUJQCAAAAAFJDUZqA7Q87OXjM0aNHB4+ZR+QJAAAAyDZz97T7oEKh4FmfJbXbqAfLrp9z5dGBegIAAAAA+WJmje5ecsIXRkoT8PZNpwWP2aVLl+Ax84g8AQAAANlGUZqAz5a9HzzmggULgsfMI/IEAAAAZBtFKQAAAAAgNRSlCWjbqXvwmH369AkeM4/IEwAAAJBtFKUJ6Dz8uuAxGxsbg8fMI/IEAAAAZBtFaQIWT70heMy6urrgMfOIPAEAAADZRlGagGUzpwWPOW7cuOAx84g8AQAAANlGUQoAAAAASA1FKQAAAAAgNRSlCaj57oTgMefNmxc8Zh6RJwAAACDbKEoTsHLhrOAxmVW2OuQJAAAAyDaK0gQsuu+y4DGHDh0aPGYekScAAAAg2yhKAQAAAACpoSgFAAAAAKSGojQBHQacHTzmLbfcEjxmHpEnAAAAINsoShOwbe3A4DHr6uqCx8wj8gQAAABkG0VpAt66anDwmGYWPGYekScAAAAg2yhKAQAAAACpqViUmtlWZvaCmc00s7+Y2dh4+Z5m9ryZzTKz35hZ23j5lvHrWfH6bpv2LQAAAAAA8qqakdJPJR3p7gdIqpU00Mz6SbpK0i/dfS9JH0g6M25/pqQP4uW/jNtt1rbuflDwmIMHhz9lOI/IEwAAAJBtFYtSjyyLX24RP1zSkZLujZdPkHRs/PyY+LXi9V+3zfzCvl1OHB085uTJk4PHzCPyBAAAAGRbVdeUmllrM5sh6V1Jj0iaLWmJu6+Km7wtqSZ+XiNpriTF6z+UtFOSnc6ad+8dGzzmkCFDgsfMI/IEAAAAZFtVRam7f+butZK6SjpY0pc3NrCZ1ZlZg5k1LFq0aGN3l6qPZ/8peMwpU6YEj5lH5AkAAADItvWafdfdl0h6QtKhknYwszbxqq6S5sXP50naTZLi9dtLWlxiX/XuXnD3QseOHTew+wAAAACAPKtm9t2OZrZD/HxrSUdJelVRcXpi3Ox0Sb+Nn0+KXyte/7i7e5KdBgAAAABsHtpUbqLOkiaYWWtFRew97j7FzF6RdLeZXS7pRUm3xu1vlfTfZjZL0vuShm2CfmfKHheGP0WUOr865AkAAADItmpm333J3Q90997uvp+7Xxov/6u7H+zue7n7Se7+abz8k/j1XvH6v27qN5G2pTOmBo9ZX18fPGYekScAAAAg29brmlKU9v60G4PHHDlyZPCYeUSeAAAAgGyjKAUAAAAApIaiFAAAAACQGorSBHQ84eLgMSdNmhQ8Zh6RJwAAACDbKEoT0LbTXsFj9u3bN3jMPCJPAAAAQLZRlCZg3q9Or9woYTU1NcFj5hF5AgAAALKNohQAAAAAkBqKUgAAAABAaihKE9D+gAHBY44YMSJ4zDwiTwAAAEC2UZQmYKeB3wses76+PnjMPCJPAAAAQLZRlCZgwfhzg8dkVtnqkCcAAAAg2yhKE7By4ezgMadPnx48Zh6RJwAAACDbKEoBAAAAAKmhKE1A6/Ydgsfs3Llz8Jh5RJ4AAACAbKMoTUDXsyYGjzl//vzgMfOIPAEAAADZRlGagCVP3xE85pgxY4LHzCPyBAAAAGSbuXvafVChUPCGhoa0u1FWt1EPtrjurasGK3QezSx4zDwiTwAAAED6zKzR3Qul1jFSCgAAAABIDUUpAAAAACA1FKUJ2PX0a4PHzPrpzllBngAAAIBsoygFAAAAAKSGojQB70w4L3jMQqHkNcJohjwBAAAA2UZRCgAAAABIDUUpAAAAACA1FKUJ2P6wk4PHHD16dPCYeUSeAAAAgGwzd0+7DyoUCp71WVK7jXqw7Po5Vx4dqCcAAAAAkC9m1ujuJSd8YaQ0AW/fdFrwmF26dAkeM4/IEwAAAJBtFKUJ+GzZ+8FjLliwIHjMPCJPAAAAQLZRlAIAAAAAUkNRmoC2nboHj9mnT5/gMfOIPAEAAADZRlGagM7Drwses7GxMXjMPCJPAAAAQLZRlCZg8dQbgsesq6sLHjOPyBMAAACQbRSlCVg2c1rwmOPGjQseM4/IEwAAAJBtFKUAAAAAgNRQlAIAAAAAUkNRmoCa704IHnPevHnBY+YReQIAAACyjaI0ASsXzgoek1llq0OeAAAAgGyjKE3AovsuCx5z6NChwWPmEXkCAAAAso2iFAAAAACQGopSAAAAAEBqKhalZrabmT1hZq+Y2V/M7Nx4+Rgzm2dmM+LHoKJtfmxms8zsdTMbsCnfQBZ0GHB28Ji33HJL8Jh5RJ4AAACAbGtTRZtVkn7g7tPNbFtJjWb2SLzul+5+dXFjM9tX0jBJvSR1kfSome3t7p8l2fEs2bZ2YPCYdXV1wWPmEXkCAAAAsq3iSKm7L3D36fHzpZJelVRTZpNjJN3t7p+6+98kzZJ0cBKdzaq3rhocPKaZBY+ZR+QJAAAAyLb1uqbUzLpJOlDS8/Gis83sJTO7zcx2jJfVSJpbtNnbKl/EAgAAAAC+oKouSs2svaT7JJ3n7h9JullSd0m1khZIumZ9AptZnZk1mFnDokWL1mdTAAAAAMBmoqqi1My2UFSQ3uHu/ydJ7r7Q3T9z988ljdOaU3TnSdqtaPOu8bK1uHu9uxfcvdCxY8eNeQ+p27r7QcFjDh4c/pThPCJPAAAAQLZVM/uuSbpV0qvu/oui5Z2Lmh0n6eX4+SRJw8xsSzPbU1IPSS8k1+Xs2eXE0cFjTp48OXjMPCJPAAAAQLZVM1J6mKRTJR3Z7PYvPzOzP5vZS5K+Jun7kuTuf5F0j6RXJE2VdNbmPPOuJL1779jgMYcMGRI8Zh6RJwAAACDbzN3T7oMKhYI3NDSk3Y2yuo16sMV1b101WKHzaGbBY+YReQIAAADSZ2aN7l4otW69Zt8FAAAAACBJFKUAAAAAgNRQlCZgjwunBI/JKanVIU8AAABAtlGUJmDpjKnBY9bX1wePmUfkCQAAAMg2itIEvD/txuAxR44cGTxmHpEnAAAAINsoSgEAAAAAqaEoBQAAAACkhqI0AR1PuDh4zEmTJgWPmUfkCQAAAMg2itIEtO20V/CYffv2DR4zj8gTAAAAkG0UpQmY96vTg8esqakJHjOPyBMAAACQbRSlAAAAAIDUUJQCAAAAAFJDUZqA9gcMCB5zxIgRwWPmEXkCAAAAso2iNAE7Dfxe8Jj19fXBY+YReQIAAACyjaI0AQvGnxs8JrPKVoc8AQAAANlGUZqAlQtnB485ffr04DHziDwBAAAA2UZRCgAAAABIDUVpAlq37xA8ZufOnYPHzCPyBAAAAGQbRWkCup41MXjM+fPnB4+ZR+QJAAAAyDaK0gQsefqO4DHHjBkTPGYekScAAAAg28zd0+6DCoWCNzQ0pN2NsrqNerDFdW9dNVih82hmwWPmEXkCAAAA0mdmje5eKLWOkVIAAAAAQGooSgEAAAAAqaEoTcCup18bPGbWT3fOCvIEAAAAZBtFKQAAAAAgNRSlCXhnwnnBYxYKJa8RRjPkCQAAAMg2ilIAAAAAQGooSgEAAAAAqaEoTcD2h50cPObo0aODx8wj8gQAAABkm7l72n1QoVDwrM+S2m3Ug2XXz7ny6EA9AQAAAIB8MbNGdy854QsjpQl4+6bTgsfs0qVL8Jh5RJ4AAACAbKMoTcBny94PHnPBggXBY+YReQIAAACyjaIUAAAAAJAaitIEtO3UPXjMPn36BI+ZR+QJAAAAyDaK0gR0Hn5d8JiNjY3BY+YReQIAAACyjaI0AYun3hA8Zl1dXfCYeUSeAAAAgGyjKE3AspnTgsccN25c8Jh5RJ4AAACAbKMoBQAAAACkhqIUAAAAAJAaitIE1Hx3QvCY8+bNCx4zj8gTAAAAkG0Vi1Iz283MnjCzV8zsL2Z2bry8g5k9YmZvxv/uGC83M7vezGaZ2Utmttnfk2PlwlnBYzKrbHXIEwAAAJBt1YyUrpL0A3ffV1I/SWeZ2b6SRkl6zN17SHosfi1J/yKpR/yok3Rz4r3OmEX3XRY85tChQ4PHzCPyBAAAAGRbxaLU3Re4+/T4+VJJr0qqkXSMpKbzVidIOjZ+foykiR55TtIOZtY58Z4DAAAAAHJvva4pNbNukg6U9LykTu6+IF71jqRO8fMaSXOLNns7XgYAAAAAwFqqLkrNrL2k+ySd5+4fFa9zd5fk6xPYzOrMrMHMGhYtWrQ+m2ZOhwFnB495yy23BI+ZR+QJAAAAyLaqilIz20JRQXqHu/9fvHhh02m58b/vxsvnSdqtaPOu8bK1uHu9uxfcvdCxY8cN7X8mbFs7MHjMurq64DHziDwBAAAA2VbN7Lsm6VZJr7r7L4pWTZJ0evz8dEm/LVp+WjwLbz9JHxad5rtZeuuqwcFjRh8LKiFPAAAAQLa1qaLNYZJOlfRnM5sRL/uJpCsl3WNmZ0p6S9I343UPSRokaZakFZL+LdEeAwAAAAA2GxWLUnd/WlJLw01fL9HeJZ21kf0CAAAAAHwBrNfsuyht6+4HBY85eHD4U4bziDwBAAAA2UZRmoBdThwdPObkyZODx8wj8gQAAABkG0VpAt69d2zwmEOGDAkeM4/IEwAAAJBtFKUJ+Hj2n4LHnDJlSvCYeUSeAAAAgGyjKAUAAAAApIaiFAAAAACQGorSBOxxYfhTRKM776AS8gQAAABkW8X7lKKypTOmqtuo8m3mXHl0ojHr6+tVV1eX6D43R+QJAAAAyDZGShPw/rQbg8ccOXJk8Jh5RJ4AAACAbKMoBQAAAACkhqIUAAAAAJAaitIEdDzh4uAxJ02aFDxmHpEnAAAAINsoShPQttNewWP27ds3eMw8Ik8AAABAtlGUJmDer04PHrOmpiZ4zDwiTwAAAEC2UZQCAAAAAFJDUQoAAAAASA1FaQLaHzAgeMwRI0YEj5lH5AkAAADINorSBOw08HvBY9bX1wePmUfkCQAAAMg2itIELBh/bvCYzCpbHfIEAAAAZBtFaQJWLpwdPOb06dODx8wj8gQAAABkG0UpAAAAACA1FKUJaN2+Q/CYnTt3Dh4zj8gTAAAAkG0UpQnoetbE4DHnz58fPGYekScAAAAg2yhKE7Dk6TuCxxwzZkzwmHlEngAAAIBsoyhNwIfP3BU85tixY4PHzCPyBAAAAGQbRSkAAAAAIDUUpQAAAACA1FCUJmDX068NHrOhoSF4zDwiTwAAAEC2UZQCAAAAAFJDUZqAdyacFzxmoVAIHjOPyBMAAACQbRSlAAAAAIDUUJQCAAAAAFJDUZqA7Q87OXjM0aNHB4+ZR+QJAAAAyDZz97T7oEKh4FmfJbXbqAc3avs5Vx6dUE8AAAAAIF/MrNHdS074wkhpAt6+6bTgMbt06RI8Zh6RJwAAACDbKEoT8Nmy94PHXLBgQfCYeUSeAAAAgGyjKAUAAAAApIaiNAFtO3UPHrNPnz7BY+YReQIAAACyjaI0AZ2HXxc8ZmNjY/CYeUSeAAAAgGyjKE3A4qk3BI9ZV1cXPGYekScAAAAg2yoWpWZ2m5m9a2YvFy0bY2bzzGxG/BhUtO7HZjbLzF43swGbquNZsmzmtOAxx40bFzxmHpEnAAAAINuqGSkdL2lgieW/dPfa+PGQJJnZvpKGSeoVb/MrM2udVGcBAAAAAJuXikWpuz8lqdp7nhwj6W53/9Td/yZplqSDN6J/AAAAAIDN2MZcU3q2mb0Un967Y7ysRtLcojZvx8s2azXfnRA85rx584LHzCPyBAAAAGTbhhalN0vqLqlW0gJJ16zvDsyszswazKxh0aJFG9iNbFi5cFbwmMwqWx3yBAAAAGTbBhWl7r7Q3T9z988ljdOaU3TnSdqtqGnXeFmpfdS7e8HdCx07dtyQbmTGovsuCx5z6NChwWPmEXkCAAAAsm2DilIz61z08jhJTTPzTpI0zMy2NLM9JfWQ9MLGdREAAAAAsLlqU6mBmd0lqb+knc3sbUmjJfU3s1pJLmmOpJGS5O5/MbN7JL0iaZWks9z9s03TdQAAAABA3lUsSt395BKLby3T/gpJV2xMp/Kmw4Czg8e85ZZbgsfMI/IEAAAAZNvGzL6L2La1pW7jumnV1dUFj5lH5AkAAADINorSBLx11eDgMc0seMw8Ik8AAABAtlGUAgAAAABSQ1EKAAAAAEgNRWkCtu5+UPCYgweHP2U4j8gTAAAAkG0UpQnY5cTRwWNOnjw5eMw8Ik8AAABAtlGUJuDde8cGjzlkyJDgMfOIPAEAAADZRlGagI9n/yl4zClTpgSPmUfkCQAAAMg2ilIAAAAAQGooSgEAAAAAqaEoTcAeF4Y/RdTdg8fMI/IEAAAAZBtFaQKWzpgaPGZ9fX3wmHlEngAAAIBsoyhNwPvTbgwec+TIkcFj5hF5AgAAALKNohQAAAAAkBqKUgAAAABAaihKE9DxhIuDx5w0aVLwmHlEngAAAIBsoyhNQNtOewWP2bdv3+Ax84g8AQAAANlGUZqAeb86PXjMmpqa4DHziDwBAAAA2UZRCgAAAABIDUUpAAAAACA1FKUJaH/AgOAxR4wYETxmHpEnAAAAINsoShOw08DvBY9ZX18fPGYekScAAAAg2yhKE7Bg/LnBYzKrbHXIEwAAAJBtFKUJWLlwdvCY06dPDx4zj8gTAAAAkG0UpQAAAACA1FCUJqB1+w7BY3bu3Dl4zDwiTwAAAEC2UZQmoOtZE4PHnD9/fvCYeUSeAAAAgGyjKE3AkqfvCB5zzJgxwWPmEXkCAAAAso2iNAEfPnNX8Jhjx44NHjOPyBMAAACQbRSlAAAAAIDUtEm7A18U3UY9WHb9nCuPDtQTAAAAAMgORkoTsOvp1waP2dDQEDxmHpEnAAAAINsoSgEAAAAAqaEoTcA7E84LHrNQKASPmUfkCQAAAMg2ilIAAAAAQGooSgEAAAAAqaEoTcD2h50cPObo0aODx8wj8gQAAABkm7l72n1QoVDwrM+SWumWLhuLW8IAAAAA2FyZWaO7l5zwhZHSBLx902nBY3bp0iV4zDwiTwAAAEC2UZQm4LNl7wePuWDBguAx84g8AQAAANlWsSg1s9vM7F0ze7loWQcze8TM3oz/3TFebmZ2vZnNMrOXzKzPpuw8AAAAACDfqhkpHS9pYLNloyQ95u49JD0Wv5akf5HUI37USbo5mW5mW9tO3YPH7NOHer8a5AkAAADItopFqbs/Jan5+anHSJoQP58g6dii5RM98pykHcysc1KdzarOw68LHrOxsTF4zDwiTwAAAEC2beg1pZ3cvelivXckdYqf10iaW9Tu7XjZ/2/v/mP1LMs7gH+vFZlGiAysFVp+OCUaTQZiVYzEqKh0UsFEYjRudsZ5XASDyZbJlhhgxgT/mTp/heKvujjU4JwVDGjQZNNkSoswf0cgJVJLy/g1mEaCu/fHeYqH2p6ect73fd7nvJ9PcnLe536fnuvi7pXzcvW+n+dZ0e6+9sMTjzk3NzfxmENkngAAYLot+0ZHbf6ZgU01CwAADlhJREFUMof8XJmqmquqbVW17a677lpuGr168ObrJh7ziiuumHjMITJPAAAw3R5rU7p777bc7vuebnxnkuMXnLeuG/s9rbXNrbX1rbX1q1evfoxpAAAAMGSPtSndmmRT93pTkq8sGH9zdxfe05Pcv2CbLwAAADzKYQc7oaquTPLSJE+uqjuSXJzksiRfrKq3Jrk9yeu707+W5NVJbknyqyRvGUPOU2ftO7Yc/KQR27lzvwvQ7MM8AQDAdDtoU9pae+MB3jpzP+e2JOcvN6mheWj3LTnsyGMmGnP79u057rjjJhpziMwTAABMt2Xf6Ijkri+9d+IxzznnnInHHCLzBAAA001TCgAAQG80pQAAAPRGUzoCR591wcRjXn755ROPOUTmCQAAppumdASOPHXDxGPOzc1NPOYQmScAAJhumtIRuP39Gyces6omHnOIzBMAAEw3TSkAAAC90ZQCAADQG03pCDzh6c+feMyNGye/ZXiIzBMAAEw3TekIPOW8iyce86tf/erEYw6ReQIAgOl2WN8JrAR7rrp02Y3pSRdds+j7Oy47+1HHr3nNazRcS2CeAABgulkpHYFf33rDxGNeffXVE485ROYJAACmm6YUAACA3mhKAQAA6I2mdAROfPfkt4i21iYec4jMEwAATDdN6Qg8cNO1E4+5efPmicccIvMEAADTTVM6Avdc95GJx3z7298+8ZhDZJ4AAGC6aUoBAADojaYUAACA3mhKR2D1694z8Zhbt26deMwhMk8AADDdDus7gZXg8DXPGHuMky665lHHDz9wb975nd+N7bjs7LHnMETPe97z+k4BAABYhJXSEdj5sU0zEXOI1q5d23cKAADAIjSlAAAA9EZTCgAAQG80pSNwxClnzUTMIXrb297WdwoAAMAiNKUjcMyGd85EzCHavHlz3ykAAACL0JSOwK7PXDgTMYfI3XcBAGC6aUpH4KHdt85EzCG68cYb+04BAABYhKYUAACA3hzWdwIrwaojjl4RMU+66JpF399x2dkjjzluxx57bN8pAAAAi7BSOgLrzv/sTMQcol/+8pd9pwAAACxCUzoC9337czMRc4guueSSvlMAAAAWoSkdgfu/c+VMxByiSy+9tO8UAACARWhKAQAA6I2mFAAAgN5oSkfgqZs+OBMxh2jbtm19pwAAACzCI2FmxMEe9wIAANAHK6UjcOeWd81EzCFav3593ykAAACL0JQCAADQG9t3VwjbcwEAgCFaVlNaVTuSPJDkt0kebq2tr6qjk3whyUlJdiR5fWvt3uWlOd2e9OI3zkTMIbr44ov7TgEAAFjEKLbvvqy1dmprbe/Fexclub61dnKS67vjFe2oM940EzGH6JJLLuk7BQAAYBHjuKb03CRbutdbkrx2DDGmyh0fffNMxByi4447ru8UAACARSy3KW1Jvl5V26tqrhtb01rb1b2+M8maZcaYer998J6ZiDlEu3btOvhJAABAb5Z7o6MzWms7q+opSb5RVT9d+GZrrVVV298f7JrYuSQ54YQTlpkGAAAAQ7SsldLW2s7u+54kX07ygiS7q+rYJOm+7znAn93cWlvfWlu/evXq5aTRu8PXPH0mYg7Raaed1ncKAADAIh5zU1pVT6yqI/e+TvKqJD9MsjXJpu60TUm+stwkp92xf/GhmYg5RNu3b+87BQAAYBHLWSldk+TbVXVzku8luaa1dm2Sy5K8sqp+nuQV3fGKdve1H56JmEM0Nzd38JMAAIDePOamtLV2W2vtlO7rOa2193Xjd7fWzmytndxae0VrbcXfkefBm6+biZhDdMUVV/SdAgAAsIhxPBIGAAAAlmS5d99lhpx00TWLvr/jsrMnlAkAALBSWCkdgbXv2DITMYdo586dfacAAAAsQlM6Ag/tvmUmYg6Ru+8CAMB005SOwF1feu9MxByic845p+8UAACARbimlJFxzSkAAHCorJQCAADQGyulI3D0WRfMRMxxG8dK6+WXX/5Y0wEAACZAUzoCR566YSZiLtfBms5xmJubm3hMAABg6WzfHYHb379xJmIOUVX1nQIAALAITSkAAAC90ZQCAADQG03pCDzh6c+fiZhDtHGjbc4AADDN3OhoBJ5y3sUzEXPa7fdGSs/5q0fGPScVAACmj6Z0BPZcdenEm8Q+Yg7RrM3TOB6rAwAA42T77gj8+tYbZiLmEJknAACYblZKGYxJPOfUSiMAAEyWlVIAAAB6oykdgRPfffVMxBwi8wQAANNNUzoCD9x07UzEHCLzBAAA001TOgL3XPeRmYg5ROYJAACmm6YUAACA3rj7LoyQu/cCAMCh0ZSOwOrXvWcmYg7RwnmaxCNlAACAQ2P77ggcvuYZMxFziMwTAABMNyulI7DzY5sm/uiRPmIO0dDmyfbf5TOHAADDYqUUAACA3mhKAQAA6I2mdASOOOWsmYg5ROYJAACmm2tKR+CYDe+ciZhDNOp56vsOvn3HXwrXdAIAcCislI7Ars9cOBMxh8g8AQDAdLNSOgIP7b51JmIOkXk6NEtZibXSCQDAKGlKAVYYW6gBgCHRlI7AqiOOnomYQ2Se2JeGDQBgumhKR2Dd+Z+diZhDNG3z1PeNivqOPy05AAAwPdzoaATu+/bnZiLmEJknAACYblZKR+D+71yZo85404qPOUTmiSGyxRgAmCWaUuCQ2H7LUiy3Tmah8faPDwAwT1MKsMAkmm7NBkuhaQVgVmhKR+Cpmz44EzGHyDwxjZbb+FqtHj8NIQBMztia0qrakORDSVYl+URr7bJxxQJg6YawtXa5TaGmEgCGYyxNaVWtSvLRJK9MckeSG6pqa2vtx+OI17c7t7wrJ7776hUfc4jME4zeNKzUTkMOQ6dxB2BajGul9AVJbmmt3ZYkVfX5JOcmWZFNKQCzZRaa4mloWsedwzT8Nx7MEHJczNDzZ56/R8ZtXE3p2iS/WHB8R5IXjikWABwSTeX0//whWAlzMITt/MvVd0M1hBvorYRaXun6ruNxq9ba6H9o1XlJNrTW/rI7/vMkL2ytXbDgnLkkc93hM5P8bOSJjMaTk/x330kwGOqFpVIrHAr1wlKpFQ6FemGpRlErJ7bWVu/vjXGtlO5McvyC43Xd2CNaa5uTbB5T/JGpqm2ttfV958EwqBeWSq1wKNQLS6VWOBTqhaUad638wZh+7g1JTq6qp1XV4UnekGTrmGIBAAAwUGNZKW2tPVxVFyS5LvOPhPlUa+1H44gFAADAcI3tOaWtta8l+dq4fv4ETf0WY6aKemGp1AqHQr2wVGqFQ6FeWKqx1spYbnQEAAAASzGua0oBAADgoDSli6iqDVX1s6q6paou6jsf+ldVn6qqPVX1wwVjR1fVN6rq5933P+rGq6r+qauf/6qq0/rLnEmrquOr6ltV9eOq+lFVXdiNqxcepaoeX1Xfq6qbu1q5tBt/WlV9t6uJL3Q3DkxV/WF3fEv3/kl95s/kVdWqqvp+VV3dHasV9quqdlTVD6rqpqra1o35HOL3VNVRVXVVVf20qn5SVS+aZK1oSg+gqlYl+WiSP03y7CRvrKpn95sVU+AzSTbsM3ZRkutbaycnub47TuZr5+Tuay7JxyeUI9Ph4SR/3Vp7dpLTk5zf/Q5RL+zrN0le3lo7JcmpSTZU1elJ3p/kA621ZyS5N8lbu/PfmuTebvwD3XnMlguT/GTBsVphMS9rrZ264HEePofYnw8luba19qwkp2T+d8zEakVTemAvSHJLa+221tpDST6f5Nyec6JnrbV/T3LPPsPnJtnSvd6S5LULxj/b5v1nkqOq6tjJZErfWmu7Wms3dq8fyPwv97VRL+yj+zt/sDt8XPfVkrw8yVXd+L61sreGrkpyZlXVhNKlZ1W1LsnZST7RHVfUCofG5xCPUlVPSvKSJJ9MktbaQ621+zLBWtGUHtjaJL9YcHxHNwb7WtNa29W9vjPJmu61GiJJ0m2Ze26S70a9sB/ddsybkuxJ8o0ktya5r7X2cHfKwnp4pFa69+9PcsxkM6ZHH0zyt0n+rzs+JmqFA2tJvl5V26tqrhvzOcS+npbkriSf7i4N+ERVPTETrBVNKYxQm7+dtVta84iqOiLJl5K8q7X2PwvfUy/s1Vr7bWvt1CTrMr9T51k9p8QUqqqNSfa01rb3nQuDcUZr7bTMb7c8v6pesvBNn0N0DktyWpKPt9aem+R/87utuknGXyua0gPbmeT4BcfrujHY1+69Wxa673u6cTU046rqcZlvSD/XWvvXbli9cEDddqlvJXlR5rdD7X2e+MJ6eKRWuveflOTuCadKP16c5Jyq2pH5y4penvnrwNQK+9Va29l935Pky5n/Ry+fQ+zrjiR3tNa+2x1flfkmdWK1oik9sBuSnNzd0e7wJG9IsrXnnJhOW5Ns6l5vSvKVBeNv7u5QdnqS+xdsgWCF667b+mSSn7TW/nHBW+qFR6mq1VV1VPf6CUlemflrkL+V5LzutH1rZW8NnZfkm81Dx2dCa+3vWmvrWmsnZf7/S77ZWntT1Ar7UVVPrKoj975O8qokP4zPIfbRWrszyS+q6pnd0JlJfpwJ1kr53XRgVfXqzF+7sSrJp1pr7+s5JXpWVVcmeWmSJyfZneTiJP+W5ItJTkhye5LXt9bu6ZqSj2T+br2/SvKW1tq2PvJm8qrqjCT/keQH+d21X3+f+etK1QuPqKo/yfwNJFZl/h+Lv9ha+4eq+uPMr4YdneT7Sf6stfabqnp8kn/O/HXK9yR5Q2vttn6ypy9V9dIkf9Na26hW2J+uLr7cHR6W5F9aa++rqmPic4h9VNWpmb+B2uFJbkvylnSfSZlArWhKAQAA6I3tuwAAAPRGUwoAAEBvNKUAAAD0RlMKAABAbzSlAAAA9EZTCgAAQG80pQAAAPRGUwoAAEBv/h8FqDPMJfOR7wAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "users_per_item=df.groupby(['item']).count()['rating']\n", + "\n", + "plt.figure(figsize=(16,8))\n", + "plt.hist(users_per_item, bins=100)\n", + "\n", + "# Let's add median\n", + "t=users_per_item.median()\n", + "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", + "plt.text(t*1.1, plt.ylim()[1]*0.9, 'Median: {:.0f}'.format(t))\n", + "\n", + "# Let's add also some percentiles\n", + "t=users_per_item.quantile(0.25)\n", + "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", + "plt.text(t*1.1, plt.ylim()[1]*0.95, '25% quantile: {:.0f}'.format(t))\n", + "\n", + "t=users_per_item.quantile(0.75)\n", + "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", + "plt.text(t*1.05, plt.ylim()[1]*0.95, '75% quantile: {:.0f}'.format(t))\n", + "\n", + "plt.title('Number of ratings per item', fontsize=30)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "rating\n", + "1 0.06110\n", + "2 0.11370\n", + "3 0.27145\n", + "4 0.34174\n", + "5 0.21201\n", + "Name: user, dtype: float64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby(['rating']).count()['user']/len(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Item attributes" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "genres = pd.read_csv('./Datasets/ml-100k/u.genre', sep='|', header=None,\n", + " encoding='latin-1')\n", + "genres=dict(zip(genres[1], genres[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0: 'unknown',\n", + " 1: 'Action',\n", + " 2: 'Adventure',\n", + " 3: 'Animation',\n", + " 4: \"Children's\",\n", + " 5: 'Comedy',\n", + " 6: 'Crime',\n", + " 7: 'Documentary',\n", + " 8: 'Drama',\n", + " 9: 'Fantasy',\n", + " 10: 'Film-Noir',\n", + " 11: 'Horror',\n", + " 12: 'Musical',\n", + " 13: 'Mystery',\n", + " 14: 'Romance',\n", + " 15: 'Sci-Fi',\n", + " 16: 'Thriller',\n", + " 17: 'War',\n", + " 18: 'Western'}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "genres" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "movies = pd.read_csv('./Datasets/ml-100k/u.item', sep='|', encoding='latin-1', header=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...14151617181920212223
01Toy Story (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Toy%20Story%2...00011...0000000000
12GoldenEye (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?GoldenEye%20(...01100...0000000100
23Four Rooms (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Four%20Rooms%...00000...0000000100
\n", + "

3 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 \\\n", + "0 1 Toy Story (1995) 01-Jan-1995 NaN \n", + "1 2 GoldenEye (1995) 01-Jan-1995 NaN \n", + "2 3 Four Rooms (1995) 01-Jan-1995 NaN \n", + "\n", + " 4 5 6 7 8 9 ... \\\n", + "0 http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 0 1 1 ... \n", + "1 http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 1 0 0 ... \n", + "2 http://us.imdb.com/M/title-exact?Four%20Rooms%... 0 0 0 0 0 ... \n", + "\n", + " 14 15 16 17 18 19 20 21 22 23 \n", + "0 0 0 0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 1 0 0 \n", + "2 0 0 0 0 0 0 0 1 0 0 \n", + "\n", + "[3 rows x 24 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies[:3]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(19):\n", + " movies[i+5]=movies[i+5].apply(lambda x: genres[i] if x==1 else '')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "movies['genre']=movies.iloc[:, 5:].apply(lambda x: ', '.join(x[x!='']), axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "movies=movies[[0,1,'genre']]\n", + "movies.columns=['id', 'title', 'genres']" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitlegenres
01Toy Story (1995)Animation, Children's, Comedy
12GoldenEye (1995)Action, Adventure, Thriller
23Four Rooms (1995)Thriller
34Get Shorty (1995)Action, Comedy, Drama
45Copycat (1995)Crime, Drama, Thriller
\n", + "
" + ], + "text/plain": [ + " id title genres\n", + "0 1 Toy Story (1995) Animation, Children's, Comedy\n", + "1 2 GoldenEye (1995) Action, Adventure, Thriller\n", + "2 3 Four Rooms (1995) Thriller\n", + "3 4 Get Shorty (1995) Action, Comedy, Drama\n", + "4 5 Copycat (1995) Crime, Drama, Thriller" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.to_csv('./Datasets/ml-100k/movies.csv', index=False)\n", + "movies[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Toy example" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "os.makedirs('./Datasets/toy-example/', exist_ok = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "toy_train=pd.DataFrame([[0,0,3,0], [0,10,4,0], [0,40,5,0], [0,70,4,0],\n", + " [10,10,1,0], [10,20,2,0], [10,30,3,0],\n", + " [20,30,5,0], [20,50,3,0], [20,60,4,0]])\n", + "toy_test=pd.DataFrame([[0,60,3,0],\n", + " [10,40,5,0],\n", + " [20,0,5,0], [20,20,4,0], [20,70,2,0]])\n", + "\n", + "toy_train.to_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, index=False)\n", + "toy_test.to_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/P1. Baseline.ipynb b/P1. Baseline.ipynb new file mode 100644 index 0000000..58d6b8a --- /dev/null +++ b/P1. Baseline.ipynb @@ -0,0 +1,1253 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Preparing dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.sparse as sparse\n", + "from collections import defaultdict\n", + "from itertools import chain\n", + "import random\n", + "\n", + "train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", + "test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's prepare dataset\n", + "train_and_test=pd.concat([train_read, test_read], axis=0, ignore_index=True)\n", + "train_and_test['user_code'] = train_and_test['user'].astype(\"category\").cat.codes\n", + "train_and_test['item_code'] = train_and_test['item'].astype(\"category\").cat.codes\n", + "\n", + "user_code_id = dict(enumerate(train_and_test['user'].astype(\"category\").cat.categories))\n", + "user_id_code = dict((v, k) for k, v in user_code_id.items())\n", + "item_code_id = dict(enumerate(train_and_test['item'].astype(\"category\").cat.categories))\n", + "item_id_code = dict((v, k) for k, v in item_code_id.items())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
useritemratingtimestampuser_codeitem_code
06645254876526580663524
14912888068651480
23522732884290328351272
361896389130774961795
456024287997677255923
\n", + "
" + ], + "text/plain": [ + " user item rating timestamp user_code item_code\n", + "0 664 525 4 876526580 663 524\n", + "1 49 1 2 888068651 48 0\n", + "2 352 273 2 884290328 351 272\n", + "3 618 96 3 891307749 617 95\n", + "4 560 24 2 879976772 559 23" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_and_test[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "train_df=pd.merge(train_read, train_and_test, on=list(train_read.columns))\n", + "test_df=pd.merge(test_read, train_and_test, on=list(train_read.columns))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Take number of users and items\n", + "(U,I)=(train_and_test['user_code'].max()+1, train_and_test['item_code'].max()+1)\n", + "\n", + "# Create sparse csr matrices\n", + "train_ui = sparse.csr_matrix((train_df['rating'], (train_df['user_code'], train_df['item_code'])), shape=(U, I))\n", + "test_ui = sparse.csr_matrix((test_df['rating'], (test_df['user_code'], test_df['item_code'])), shape=(U, I))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Above steps are the same for many algorithms, so I put the code in separate file:\n", + "import helpers\n", + "train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n", + "test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", + "train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### CSR matrices - what is it?" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<3x4 sparse matrix of type ''\n", + "\twith 8 stored elements in Compressed Sparse Row format>" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "row = np.array([0, 0, 0, 1, 1, 2, 2, 2])\n", + "col = np.array([0, 1, 2, 1, 3, 2, 0, 3])\n", + "data = np.array([4, 1, 3, 2,1, 5, 2, 4])\n", + "sample_csr=sparse.csr_matrix((data, (row, col)))\n", + "sample_csr" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ratings matrix with missing entries replaced by zeros:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[4, 1, 3, 0],\n", + " [0, 2, 0, 1],\n", + " [2, 0, 5, 4]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of ratings: 8\n", + "Number of users: 3\n", + "Number of items: 4\n" + ] + } + ], + "source": [ + "print('Ratings matrix with missing entries replaced by zeros:')\n", + "display(sample_csr.todense())\n", + "\n", + "print(f'Number of ratings: {sample_csr.nnz}')\n", + "print(f'Number of users: {sample_csr.shape[0]}')\n", + "print(f'Number of items: {sample_csr.shape[1]}')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ratings data: [4 1 3 2 1 2 5 4]\n", + "Regarding items: [0 1 2 1 3 0 2 3]\n", + "Where ratings from 0 to 2 belongs to user 0.\n", + "Where ratings from 3 to 4 belongs to user 1.\n", + "Where ratings from 5 to 7 belongs to user 2.\n" + ] + } + ], + "source": [ + "print('Ratings data:', sample_csr.data)\n", + "\n", + "print('Regarding items:', sample_csr.indices)\n", + "\n", + "for i in range(sample_csr.shape[0]):\n", + " print(f'Where ratings from {sample_csr.indptr[i]} to {sample_csr.indptr[i+1]-1} belongs to user {i}.')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Efficient way to access items rated by user:\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 0, 6, 10, 27, 49, 78, 95, 97, 116, 143, 153, 156, 167,\n", + " 171, 172, 173, 194, 208, 225, 473, 495, 549, 615], dtype=int32)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.13 µs ± 79.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n", + "Inefficient way to access items rated by user:\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 0, 6, 10, 27, 49, 78, 95, 97, 116, 143, 153, 156, 167,\n", + " 171, 172, 173, 194, 208, 225, 473, 495, 549, 615], dtype=int32)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "149 µs ± 11.5 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" + ] + } + ], + "source": [ + "user=123\n", + "\n", + "print('Efficient way to access items rated by user:')\n", + "display(train_ui.indices[train_ui.indptr[user]:train_ui.indptr[user+1]])\n", + "%timeit train_ui.indices[train_ui.indptr[user]:train_ui.indptr[user+1]]\n", + "\n", + "print('Inefficient way to access items rated by user:')\n", + "display(train_ui[user].indices)\n", + "%timeit train_ui[user].indices" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###### Example: subtracting row means" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Our matrix:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[4, 1, 3, 0],\n", + " [0, 2, 0, 1],\n", + " [2, 0, 5, 4]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "List of row sums:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[ 8, 3, 11]])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print('Our matrix:')\n", + "display(sample_csr.todense())\n", + "print('List of row sums:')\n", + "sample_csr.sum(axis=1).ravel()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Array with row means:\n" + ] + }, + { + "data": { + "text/plain": [ + "array([2.66666667, 1.5 , 3.66666667])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Diagonal csr matrix with inverse of row sums on diagonal:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[2.66666667, 0. , 0. ],\n", + " [0. , 1.5 , 0. ],\n", + " [0. , 0. , 3.66666667]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Let's apply them in nonzero entries:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[2.66666667, 2.66666667, 2.66666667, 0. ],\n", + " [0. , 1.5 , 0. , 1.5 ],\n", + " [3.66666667, 0. , 3.66666667, 3.66666667]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finally after subtraction:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[ 1.33333333, -1.66666667, 0.33333333, 0. ],\n", + " [ 0. , 0.5 , 0. , -0.5 ],\n", + " [-1.66666667, 0. , 1.33333333, 0.33333333]])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print('Array with row means:')\n", + "row_means=np.asarray(sample_csr.sum(axis=1).ravel())[0]/np.diff(sample_csr.indptr)\n", + "display(row_means)\n", + "\n", + "print('Diagonal csr matrix with inverse of row sums on diagonal:')\n", + "display(sparse.diags(row_means).todense())\n", + "\n", + "print(\"\"\"Let's apply them in nonzero entries:\"\"\")\n", + "to_subtract=sparse.diags(row_means)*(sample_csr>0)\n", + "display(to_subtract.todense())\n", + "\n", + "print(\"Finally after subtraction:\")\n", + "sample_csr-to_subtract.todense()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###### Transposing" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample matrix: \n", + " [[4 1 3 0]\n", + " [0 2 0 1]\n", + " [2 0 5 4]]\n", + "\n", + "Indices: \n", + " [0 1 2 1 3 0 2 3]\n", + "\n", + "Transposed matrix: \n", + " [[4 0 2]\n", + " [1 2 0]\n", + " [3 0 5]\n", + " [0 1 4]]\n", + "\n", + "Indices of transposed matrix: \n", + " [0 1 2 1 3 0 2 3]\n", + "\n", + "Reason: \n", + "\n", + "After converting to csr: \n", + " [0 2 0 1 0 2 1 2]\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from scipy import sparse\n", + "row = np.array([0, 0, 0, 1, 1, 2, 2, 2])\n", + "col = np.array([0, 1, 2, 1, 3, 2, 0, 3])\n", + "data = np.array([4, 1, 3, 2,1, 5, 2, 4])\n", + "sample=sparse.csr_matrix((data, (row, col)))\n", + "print('Sample matrix: \\n', sample.A)\n", + "print('\\nIndices: \\n', sample.indices)\n", + "transposed=sample.transpose()\n", + "print('\\nTransposed matrix: \\n', transposed.A)\n", + "print('\\nIndices of transposed matrix: \\n', transposed.indices)\n", + "\n", + "print('\\nReason: ', type(transposed))\n", + "\n", + "print('\\nAfter converting to csr: \\n', transposed.tocsr().indices)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Self made top popular" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "if not os.path.exists('./Recommendations generated/'):\n", + " os.mkdir('./Recommendations generated/')\n", + " os.mkdir('./Recommendations generated/ml-100k/')\n", + " os.mkdir('./Recommendations generated/toy-example/')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "top_pop = []\n", + "train_iu = train_ui.transpose().tocsr()\n", + "scaling_factor = train_ui.max()/max(np.diff(train_iu.indptr))\n", + "\n", + "for i in range(train_iu.shape[0]):\n", + " top_pop.append((i, (train_iu.indptr[i+1]-train_iu.indptr[i])*scaling_factor))\n", + " \n", + "top_pop.sort(key=lambda x: x[1], reverse=True)\n", + "#top_pop is an array of pairs (item, rescaled_popularity) sorted descending from the most popular\n", + "\n", + "k = 10\n", + "result = []\n", + "\n", + "for u in range(train_ui.shape[0]):\n", + " user_rated = train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n", + " rec_user = []\n", + " item_pos = 0\n", + " while len(rec_user)<10:\n", + " if top_pop[item_pos][0] not in user_rated:\n", + " rec_user.append((item_code_id[top_pop[item_pos][0]], top_pop[item_pos][1]))\n", + " item_pos+=1\n", + " result.append([user_code_id[u]]+list(chain(*rec_user)))\n", + "\n", + "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopPop_reco.csv', index=False, header=False)\n", + "\n", + "\n", + "# estimations - score is a bit artificial since that method is not designed for scoring, but for ranking\n", + "\n", + "estimations=[]\n", + "\n", + "for user, item in zip(*test_ui.nonzero()):\n", + " estimations.append([user_code_id[user], item_code_id[item],\n", + " (train_iu.indptr[item+1]-train_iu.indptr[item])*scaling_factor])\n", + "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopPop_estimations.csv', index=False, header=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Self made top rated" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "top_rated = []\n", + "global_avg = sum(train_iu.data)/train_ui.nnz\n", + "\n", + "for i in range(train_iu.shape[0]):\n", + " ratings = train_iu.data[train_iu.indptr[i]: train_iu.indptr[i+1]]\n", + " avg = np.mean(ratings) if len(ratings)>0 else global_avg\n", + " top_rated.append((i, avg))\n", + " \n", + "top_rated.sort(key=lambda x: x[1], reverse=True)\n", + " \n", + "k=10\n", + "result=[]\n", + "\n", + "for u in range(train_ui.shape[0]):\n", + " user_rated=train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n", + " rec_user=[]\n", + " item_pos=0\n", + " while len(rec_user)<10:\n", + " if top_rated[item_pos][0] not in user_rated:\n", + " rec_user.append((item_code_id[top_rated[item_pos][0]], top_rated[item_pos][1]))\n", + " item_pos+=1\n", + " result.append([user_code_id[u]]+list(chain(*rec_user)))\n", + "\n", + "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopRated_reco.csv', index=False, header=False)\n", + "\n", + "\n", + "\n", + "estimations=[]\n", + "d = dict(top_rated)\n", + "\n", + "for user, item in zip(*test_ui.nonzero()):\n", + " estimations.append([user_code_id[user], item_code_id[item], d[item]])\n", + "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopRated_estimations.csv', index=False, header=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...11121314151617181920
018145.011225.011895.012015.01293...13065.014675.014915.015005.015365.0
121195.08145.011225.011895.01201...12935.013065.014675.014915.015005.0
\n", + "

2 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... 11 12 13 \\\n", + "0 1 814 5.0 1122 5.0 1189 5.0 1201 5.0 1293 ... 1306 5.0 1467 \n", + "1 2 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n", + "\n", + " 14 15 16 17 18 19 20 \n", + "0 5.0 1491 5.0 1500 5.0 1536 5.0 \n", + "1 5.0 1467 5.0 1491 5.0 1500 5.0 \n", + "\n", + "[2 rows x 21 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(result)[:2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Self-made baseline" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "class selfBaselineUI():\n", + " \n", + " def fit(self, train_ui):\n", + " self.train_ui=train_ui.copy()\n", + " self.train_iu=train_ui.transpose().tocsr()\n", + " \n", + " result=self.train_ui.copy()\n", + " \n", + " self.row_means=np.asarray(result.sum(axis=1).ravel())[0]/np.diff(result.indptr)\n", + " \n", + " # in csr format after addition or multiplication 0 entries \"disappear\" - so some workaraunds are needed \n", + " # (other option is to define addition/multiplication in a desired way)\n", + " row_means=self.row_means.copy()\n", + " \n", + " max_row_mean=np.max(row_means)\n", + " row_means[row_means==0]=max_row_mean+1\n", + " to_subtract_rows=sparse.diags(row_means)*(result>0)\n", + " to_subtract_rows.sort_indices() # needed to have valid .data\n", + " \n", + " subtract=to_subtract_rows.data\n", + " subtract[subtract==max_row_mean+1]=0\n", + " \n", + " result.data=result.data-subtract\n", + "# we can't do result=train_ui-to_subtract_rows since then 0 entries will \"disappear\" in csr format\n", + " self.col_means=np.divide(np.asarray(result.sum(axis=0).ravel())[0], np.diff(self.train_iu.indptr),\\\n", + " out=np.zeros(self.train_iu.shape[0]), where=np.diff(self.train_iu.indptr)!=0) # handling items without ratings\n", + " \n", + " # again - it is possible that some mean will be zero, so let's use the same workaround\n", + " col_means=self.col_means.copy()\n", + " \n", + " max_col_mean=np.max(col_means)\n", + " col_means[col_means==0]=max_col_mean+1\n", + " to_subtract_cols=result.power(0)*sparse.diags(col_means)\n", + " to_subtract_cols.sort_indices() # needed to have valid .data\n", + " \n", + " subtract=to_subtract_cols.data\n", + " subtract[subtract==max_col_mean+1]=0\n", + " \n", + " result.data=result.data-subtract\n", + "\n", + " return result\n", + " \n", + " \n", + " def recommend(self, user_code_id, item_code_id, topK=10):\n", + " estimations=np.tile(self.row_means[:,None], [1, self.train_ui.shape[1]]) +np.tile(self.col_means, [self.train_ui.shape[0], 1])\n", + " \n", + " top_k = defaultdict(list)\n", + " for nb_user, user in enumerate(estimations):\n", + " \n", + " user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]\n", + " for item, score in enumerate(user):\n", + " if item not in user_rated:\n", + " top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n", + " result=[]\n", + " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n", + " for uid, item_scores in top_k.items():\n", + " item_scores.sort(key=lambda x: x[1], reverse=True)\n", + " result.append([uid]+list(chain(*item_scores[:topK])))\n", + " return result\n", + " \n", + " def estimate(self, user_code_id, item_code_id, test_ui):\n", + " result=[]\n", + " for user, item in zip(*test_ui.nonzero()):\n", + " result.append([user_code_id[user], item_code_id[item], self.row_means[user]+self.col_means[item]])\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n", + " [0, 1, 2, 3, 0, 0, 0, 0],\n", + " [0, 0, 0, 5, 0, 3, 4, 0]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "After subtracting rows and columns:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[ 0. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ],\n", + " [ 0. , -0.5, 0. , 0. , 0. , 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recommend best unseen item:\n" + ] + }, + { + "data": { + "text/plain": [ + "[[0, 30, 5.0], [10, 40, 3.0], [20, 40, 5.0]]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Print estimations on unseen items:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
useritemest_score
00604.0
110403.0
22003.0
320204.0
420704.0
\n", + "
" + ], + "text/plain": [ + " user item est_score\n", + "0 0 60 4.0\n", + "1 10 40 3.0\n", + "2 20 0 3.0\n", + "3 20 20 4.0\n", + "4 20 70 4.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", + "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", + "\n", + "toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \\\n", + "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n", + "\n", + "print('Training data:')\n", + "display(toy_train_ui.todense())\n", + "\n", + "model=selfBaselineUI()\n", + "print('After subtracting rows and columns:')\n", + "display(model.fit(toy_train_ui).todense())\n", + "\n", + "print('Recommend best unseen item:')\n", + "display(model.recommend(toy_user_code_id, toy_item_code_id, topK=1))\n", + "\n", + "print('Print estimations on unseen items:')\n", + "estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_ui))\n", + "estimations.columns=['user', 'item', 'est_score']\n", + "display(estimations)\n", + "\n", + "top_n=pd.DataFrame(model.recommend(toy_user_code_id, toy_item_code_id, topK=3))\n", + "\n", + "top_n.to_csv('Recommendations generated/toy-example/Self_BaselineUI_reco.csv', index=False, header=False)\n", + "\n", + "estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_ui))\n", + "estimations.to_csv('Recommendations generated/toy-example/Self_BaselineUI_estimations.csv', index=False, header=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "model=selfBaselineUI()\n", + "model.fit(train_ui)\n", + "\n", + "top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n", + "\n", + "top_n.to_csv('Recommendations generated/ml-100k/Self_BaselineUI_reco.csv', index=False, header=False)\n", + "\n", + "estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n", + "estimations.to_csv('Recommendations generated/ml-100k/Self_BaselineUI_estimations.csv', index=False, header=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# project task 1: implement self-made BaselineIU" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# Implement recommender system which will recommend movies (which user hasn't seen) which is similar to BaselineUI\n", + "# but first subtract column means then row means\n", + "# The output should be saved in 'Recommendations generated/ml-100k/Self_BaselineIU_reco.csv'\n", + "# and 'Recommendations generated/ml-100k/Self_BaselineIU_estimations.csv'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Ready-made baseline - Surprise implementation" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Estimating biases using als...\n" + ] + } + ], + "source": [ + "import surprise as sp\n", + "import time\n", + "\n", + "# Based on surprise.readthedocs.io\n", + "def get_top_n(predictions, n=10):\n", + " \n", + " # Here we create a dictionary which items are lists of pairs (item, score)\n", + " top_n = defaultdict(list)\n", + " for uid, iid, true_r, est, _ in predictions:\n", + " top_n[uid].append((iid, est))\n", + " \n", + " result=[]\n", + " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n", + " for uid, user_ratings in top_n.items():\n", + " user_ratings.sort(key=lambda x: x[1], reverse=True)\n", + " result.append([uid]+list(chain(*user_ratings[:n]))) \n", + " return result\n", + "\n", + "\n", + "reader = sp.Reader(line_format='user item rating timestamp', sep='\\t')\n", + "trainset = sp.Dataset.load_from_file('./Datasets/ml-100k/train.csv', reader=reader)\n", + "trainset = trainset.build_full_trainset() # -> it is needed for using Surprise package\n", + "\n", + "testset = sp.Dataset.load_from_file('./Datasets/ml-100k/test.csv', reader=reader)\n", + "testset = sp.Trainset.build_testset(testset.build_full_trainset())\n", + "\n", + "algo = sp.BaselineOnly()\n", + "# algo = sp.BaselineOnly(bsl_options={'method':'sgd', 'reg':0, 'n_epochs':2000})\n", + "# observe how bad results gives above algorithm\n", + "# more details http://courses.ischool.berkeley.edu/i290-dm/s11/SECURE/a1-koren.pdf - chapter 2.1\n", + "\n", + "algo.fit(trainset)\n", + "\n", + "antitrainset = trainset.build_anti_testset() # We want to predict ratings of pairs (user, item) which are not in train set\n", + "predictions = algo.test(antitrainset)\n", + "\n", + "top_n = get_top_n(predictions, n=10)\n", + "\n", + "top_n=pd.DataFrame(top_n)\n", + "\n", + "top_n.to_csv('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', index=False, header=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 0.9495\n", + "MAE: 0.7525\n" + ] + }, + { + "data": { + "text/plain": [ + "0.7524871012820799" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compute RMSE on testset using buildin functions\n", + "predictions = algo.test(testset)\n", + "sp.accuracy.rmse(predictions, verbose=True)\n", + "\n", + "# Let's also save the results in file\n", + "predictions_df=[]\n", + "for uid, iid, true_r, est, _ in predictions:\n", + " predictions_df.append([uid, iid, est])\n", + " \n", + "predictions_df=pd.DataFrame(predictions_df)\n", + "predictions_df.to_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', index=False, header=False)\n", + "\n", + "sp.accuracy.mae(predictions, verbose=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Let's compare with random" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 1.5239\n", + "MAE: 1.2268\n" + ] + }, + { + "data": { + "text/plain": [ + "1.2267993503843746" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# in surprise random is an algorithm predicting random value regarding to normal distribution estimated from train set\n", + "algo = sp.NormalPredictor()\n", + "algo.fit(trainset)\n", + "\n", + "antitrainset = trainset.build_anti_testset() # We want to predict ratings of pairs (user, item) which are not in train set\n", + "predictions = algo.test(antitrainset)\n", + "\n", + "top_n = get_top_n(predictions, n=10)\n", + "\n", + "top_n=pd.DataFrame(top_n)\n", + "\n", + "top_n.to_csv('Recommendations generated/ml-100k/Ready_Random_reco.csv', index=False, header=False)\n", + "\n", + "# Compute RMSE on testset using buildin functions\n", + "predictions = algo.test(testset)\n", + "sp.accuracy.rmse(predictions, verbose=True)\n", + "\n", + "# Let's also save the results in file\n", + "predictions_df=[]\n", + "for uid, iid, true_r, est, _ in predictions:\n", + " predictions_df.append([uid, iid, est])\n", + " \n", + "predictions_df=pd.DataFrame(predictions_df)\n", + "predictions_df.to_csv('Recommendations generated/ml-100k/Ready_Random_estimations.csv', index=False, header=False)\n", + "\n", + "sp.accuracy.mae(predictions, verbose=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/P1. Introduction and baseline.pdf b/P1. Introduction and baseline.pdf new file mode 100644 index 0000000..2bff70b Binary files /dev/null and b/P1. Introduction and baseline.pdf differ diff --git a/P2. Evaluation.ipynb b/P2. Evaluation.ipynb new file mode 100644 index 0000000..1bc216d --- /dev/null +++ b/P2. Evaluation.ipynb @@ -0,0 +1,1814 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare test set" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.sparse as sparse\n", + "from collections import defaultdict\n", + "from itertools import chain\n", + "import random\n", + "from tqdm import tqdm\n", + "\n", + "# In evaluation we do not load train set - it is not needed\n", + "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", + "test.columns=['user', 'item', 'rating', 'timestamp']\n", + "\n", + "test['user_code'] = test['user'].astype(\"category\").cat.codes\n", + "test['item_code'] = test['item'].astype(\"category\").cat.codes\n", + "\n", + "user_code_id = dict(enumerate(test['user'].astype(\"category\").cat.categories))\n", + "user_id_code = dict((v, k) for k, v in user_code_id.items())\n", + "item_code_id = dict(enumerate(test['item'].astype(\"category\").cat.categories))\n", + "item_id_code = dict((v, k) for k, v in item_code_id.items())\n", + "\n", + "test_ui = sparse.csr_matrix((test['rating'], (test['user_code'], test['item_code'])))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Estimations metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)\n", + "estimations_df.columns=['user', 'item' ,'score']\n", + "\n", + "estimations_df['user_code']=[user_id_code[user] for user in estimations_df['user']]\n", + "estimations_df['item_code']=[item_id_code[item] for item in estimations_df['item']]\n", + "estimations=sparse.csr_matrix((estimations_df['score'], (estimations_df['user_code'], estimations_df['item_code'])), shape=test_ui.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def estimations_metrics(test_ui, estimations):\n", + " result=[]\n", + "\n", + " RMSE=(np.sum((estimations.data-test_ui.data)**2)/estimations.nnz)**(1/2)\n", + " result.append(['RMSE', RMSE])\n", + "\n", + " MAE=np.sum(abs(estimations.data-test_ui.data))/estimations.nnz\n", + " result.append(['MAE', MAE])\n", + " \n", + " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n", + " df_result.columns=list(zip(*result))[0]\n", + " return df_result" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RMSEMAE
00.9494590.752487
\n", + "
" + ], + "text/plain": [ + " RMSE MAE\n", + "0 0.949459 0.752487" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# in case of error (in the laboratories) you might have to switch to the other version of pandas\n", + "# try !pip3 install pandas=='1.0.3' (or pip if you use python 2) and restart the kernel\n", + "\n", + "estimations_metrics(test_ui, estimations)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ranking metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[663, 475, 62, ..., 472, 269, 503],\n", + " [ 48, 313, 475, ..., 591, 175, 466],\n", + " [351, 313, 475, ..., 591, 175, 466],\n", + " ...,\n", + " [259, 313, 475, ..., 11, 591, 175],\n", + " [ 33, 313, 475, ..., 11, 591, 175],\n", + " [ 77, 313, 475, ..., 11, 591, 175]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "reco = np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')\n", + "# Let's ignore scores - they are not used in evaluation: \n", + "users=reco[:,:1]\n", + "items=reco[:,1::2]\n", + "# Let's use inner ids instead of real ones\n", + "users=np.vectorize(lambda x: user_id_code.setdefault(x, -1))(users)\n", + "items=np.vectorize(lambda x: item_id_code.setdefault(x, -1))(items) # maybe items we recommend are not in test set\n", + "# Let's put them into one array\n", + "reco=np.concatenate((users, items), axis=1)\n", + "reco" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def ranking_metrics(test_ui, reco, super_reactions=[], topK=10):\n", + " \n", + " nb_items=test_ui.shape[1]\n", + " relevant_users, super_relevant_users, prec, rec, F_1, F_05, prec_super, rec_super, ndcg, mAP, MRR, LAUC, HR=\\\n", + " 0,0,0,0,0,0,0,0,0,0,0,0,0\n", + " \n", + " cg = (1.0 / np.log2(np.arange(2, topK + 2)))\n", + " cg_sum = np.cumsum(cg)\n", + " \n", + " for (nb_user, user) in tqdm(enumerate(reco[:,0])):\n", + " u_rated_items=test_ui.indices[test_ui.indptr[user]:test_ui.indptr[user+1]]\n", + " nb_u_rated_items=len(u_rated_items)\n", + " if nb_u_rated_items>0: # skip users with no items in test set (still possible that there will be no super items)\n", + " relevant_users+=1\n", + " \n", + " u_super_items=u_rated_items[np.vectorize(lambda x: x in super_reactions)\\\n", + " (test_ui.data[test_ui.indptr[user]:test_ui.indptr[user+1]])]\n", + " # more natural seems u_super_items=[item for item in u_rated_items if test_ui[user,item] in super_reactions]\n", + " # but accesing test_ui[user,item] is expensive -we should avoid doing it\n", + " if len(u_super_items)>0:\n", + " super_relevant_users+=1\n", + " \n", + " user_successes=np.zeros(topK)\n", + " nb_user_successes=0\n", + " user_super_successes=np.zeros(topK)\n", + " nb_user_super_successes=0\n", + " \n", + " # evaluation\n", + " for (item_position,item) in enumerate(reco[nb_user,1:topK+1]):\n", + " if item in u_rated_items:\n", + " user_successes[item_position]=1\n", + " nb_user_successes+=1\n", + " if item in u_super_items:\n", + " user_super_successes[item_position]=1\n", + " nb_user_super_successes+=1\n", + " \n", + " prec_u=nb_user_successes/topK \n", + " prec+=prec_u\n", + " \n", + " rec_u=nb_user_successes/nb_u_rated_items\n", + " rec+=rec_u\n", + " \n", + " F_1+=2*(prec_u*rec_u)/(prec_u+rec_u) if prec_u+rec_u>0 else 0\n", + " F_05+=(0.5**2+1)*(prec_u*rec_u)/(0.5**2*prec_u+rec_u) if prec_u+rec_u>0 else 0\n", + " \n", + " prec_super+=nb_user_super_successes/topK\n", + " rec_super+=nb_user_super_successes/max(len(u_super_items),1) # to set 0 if no super items\n", + " ndcg+=np.dot(user_successes,cg)/cg_sum[min(topK, nb_u_rated_items)-1]\n", + " \n", + " cumsum_successes=np.cumsum(user_successes)\n", + " mAP+=np.dot(cumsum_successes/np.arange(1,topK+1), user_successes)/min(topK, nb_u_rated_items)\n", + " MRR+=1/(user_successes.nonzero()[0][0]+1) if user_successes.nonzero()[0].size>0 else 0\n", + " LAUC+=(np.dot(cumsum_successes, 1-user_successes)+\\\n", + " (nb_user_successes+nb_u_rated_items)/2*((nb_items-nb_u_rated_items)-(topK-nb_user_successes)))/\\\n", + " ((nb_items-nb_u_rated_items)*nb_u_rated_items)\n", + " \n", + " HR+=nb_user_successes>0\n", + " \n", + " \n", + " result=[]\n", + " result.append(('precision', prec/relevant_users))\n", + " result.append(('recall', rec/relevant_users))\n", + " result.append(('F_1', F_1/relevant_users))\n", + " result.append(('F_05', F_05/relevant_users))\n", + " result.append(('precision_super', prec_super/super_relevant_users))\n", + " result.append(('recall_super', rec_super/super_relevant_users))\n", + " result.append(('NDCG', ndcg/relevant_users))\n", + " result.append(('mAP', mAP/relevant_users))\n", + " result.append(('MRR', MRR/relevant_users))\n", + " result.append(('LAUC', LAUC/relevant_users))\n", + " result.append(('HR', HR/relevant_users))\n", + "\n", + " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n", + " df_result.columns=list(zip(*result))[0]\n", + " return df_result" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "943it [00:00, 7783.14it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
precisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHR
00.091410.0376520.046030.0612860.0796140.0564630.0959570.0431780.1981930.5155010.437964
\n", + "
" + ], + "text/plain": [ + " precision recall F_1 F_05 precision_super recall_super \\\n", + "0 0.09141 0.037652 0.04603 0.061286 0.079614 0.056463 \n", + "\n", + " NDCG mAP MRR LAUC HR \n", + "0 0.095957 0.043178 0.198193 0.515501 0.437964 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ranking_metrics(test_ui, reco, super_reactions=[4,5], topK=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Diversity metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def diversity_metrics(test_ui, reco, topK=10):\n", + " \n", + " frequencies=defaultdict(int)\n", + " \n", + " # let's assign 0 to all items in test set\n", + " for item in list(set(test_ui.indices)):\n", + " frequencies[item]=0\n", + " \n", + " # counting frequencies\n", + " for item in reco[:,1:].flat:\n", + " frequencies[item]+=1\n", + " \n", + " nb_reco_outside_test=frequencies[-1]\n", + " del frequencies[-1]\n", + " \n", + " frequencies=np.array(list(frequencies.values()))\n", + " \n", + " nb_rec_items=len(frequencies[frequencies>0])\n", + " nb_reco_inside_test=np.sum(frequencies)\n", + " \n", + " frequencies=frequencies/np.sum(frequencies)\n", + " frequencies=np.sort(frequencies)\n", + " \n", + " with np.errstate(divide='ignore'): # let's put zeros put items with 0 frequency and ignore division warning\n", + " log_frequencies=np.nan_to_num(np.log(frequencies), posinf=0, neginf=0)\n", + " \n", + " result=[]\n", + " result.append(('Reco in test', nb_reco_inside_test/(nb_reco_inside_test+nb_reco_outside_test)))\n", + " result.append(('Test coverage', nb_rec_items/test_ui.shape[1]))\n", + " result.append(('Shannon', -np.dot(frequencies, log_frequencies)))\n", + " result.append(('Gini', np.dot(frequencies, np.arange(1-len(frequencies), len(frequencies), 2))/(len(frequencies)-1)))\n", + " \n", + " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n", + " df_result.columns=list(zip(*result))[0]\n", + " return df_result" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Reco in testTest coverageShannonGini
01.00.0339112.8365130.991139
\n", + "
" + ], + "text/plain": [ + " Reco in test Test coverage Shannon Gini\n", + "0 1.0 0.033911 2.836513 0.991139" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# in case of errors try !pip3 install numpy==1.18.4 (or pip if you use python 2) and restart the kernel\n", + "\n", + "import evaluation_measures as ev\n", + "import imp\n", + "imp.reload(ev)\n", + "\n", + "x=diversity_metrics(test_ui, reco, topK=10)\n", + "x" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# To be used in other notebooks" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "943it [00:00, 7347.78it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
00.9494590.7524870.091410.0376520.046030.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379641.00.0339112.8365130.991139
\n", + "
" + ], + "text/plain": [ + " RMSE MAE precision recall F_1 F_05 \\\n", + "0 0.949459 0.752487 0.09141 0.037652 0.04603 0.061286 \n", + "\n", + " precision_super recall_super NDCG mAP MRR LAUC \\\n", + "0 0.079614 0.056463 0.095957 0.043178 0.198193 0.515501 \n", + "\n", + " HR Reco in test Test coverage Shannon Gini \n", + "0 0.437964 1.0 0.033911 2.836513 0.991139 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import evaluation_measures as ev\n", + "import imp\n", + "imp.reload(ev)\n", + "\n", + "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)\n", + "reco=np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')\n", + "\n", + "ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n", + " estimations_df=estimations_df, \n", + " reco=reco,\n", + " super_reactions=[4,5])\n", + "#also you can just type ev.evaluate_all(estimations_df, reco) - I put above values as default" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "943it [00:00, 4894.39it/s]\n", + "943it [00:00, 4357.39it/s]\n", + "943it [00:00, 5045.11it/s]\n", + "943it [00:00, 4855.03it/s]\n", + "943it [00:00, 5359.75it/s]\n" + ] + } + ], + "source": [ + "import evaluation_measures as ev\n", + "import imp\n", + "imp.reload(ev)\n", + "\n", + "dir_path=\"Recommendations generated/ml-100k/\"\n", + "super_reactions=[4,5]\n", + "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", + "\n", + "df=ev.evaluate_all(test, dir_path, super_reactions)\n", + "#also you can just type ev.evaluate_all() - I put above values as default" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_super
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.137473
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.056463
0Ready_Random1.5238991.2267990.0468720.0223670.0252970.0322690.0311160.027843
0Self_TopRated1.0307120.8209040.0009540.0001880.0002980.0004810.0006440.000223
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.000189
\n", + "
" + ], + "text/plain": [ + " Model RMSE MAE precision recall F_1 \\\n", + "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", + "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", + "0 Ready_Random 1.523899 1.226799 0.046872 0.022367 0.025297 \n", + "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n", + "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", + "\n", + " F_05 precision_super recall_super \n", + "0 0.141584 0.130472 0.137473 \n", + "0 0.061286 0.079614 0.056463 \n", + "0 0.032269 0.031116 0.027843 \n", + "0 0.000481 0.000644 0.000223 \n", + "0 0.000463 0.000644 0.000189 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[:,:9]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_TopPop0.2146510.1117070.4009390.5555460.7656421.0000000.0389613.1590790.987317
0Ready_Baseline0.0959570.0431780.1981930.5155010.4379641.0000000.0339112.8365130.991139
0Ready_Random0.0514140.0197690.1275580.5076960.3329800.9875930.1847045.1047100.906035
0Self_TopRated0.0010430.0003350.0033480.4964330.0095440.6990460.0050511.9459100.995669
0Self_BaselineUI0.0007520.0001680.0016770.4964240.0095440.6005300.0050511.8031260.996380
\n", + "
" + ], + "text/plain": [ + " Model NDCG mAP MRR LAUC HR \\\n", + "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n", + "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n", + "0 Ready_Random 0.051414 0.019769 0.127558 0.507696 0.332980 \n", + "0 Self_TopRated 0.001043 0.000335 0.003348 0.496433 0.009544 \n", + "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n", + "\n", + " Reco in test Test coverage Shannon Gini \n", + "0 1.000000 0.038961 3.159079 0.987317 \n", + "0 1.000000 0.033911 2.836513 0.991139 \n", + "0 0.987593 0.184704 5.104710 0.906035 \n", + "0 0.699046 0.005051 1.945910 0.995669 \n", + "0 0.600530 0.005051 1.803126 0.996380 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[:,np.append(0,np.arange(9, df.shape[1]))]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Check metrics on toy dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "3it [00:00, 4226.71it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_BaselineUI1.6124521.40.4444440.8888890.5555560.4786320.3333330.750.6769070.5740740.6111110.6388891.00.8888890.81.3862940.25
\n", + "
" + ], + "text/plain": [ + " Model RMSE MAE precision recall F_1 F_05 \\\n", + "0 Self_BaselineUI 1.612452 1.4 0.444444 0.888889 0.555556 0.478632 \n", + "\n", + " precision_super recall_super NDCG mAP MRR LAUC HR \\\n", + "0 0.333333 0.75 0.676907 0.574074 0.611111 0.638889 1.0 \n", + "\n", + " Reco in test Test coverage Shannon Gini \n", + "0 0.888889 0.8 1.386294 0.25 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n", + " [0, 1, 2, 3, 0, 0, 0, 0],\n", + " [0, 0, 0, 5, 0, 3, 4, 0]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test data:\n" + ] + }, + { + "data": { + "text/plain": [ + "matrix([[0, 0, 0, 0, 0, 0, 3, 0],\n", + " [0, 0, 0, 0, 5, 0, 0, 0],\n", + " [5, 0, 4, 0, 0, 0, 0, 2]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recommendations:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456
00305.0204.0604.0
110403.0602.0702.0
220405.0204.0704.0
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6\n", + "0 0 30 5.0 20 4.0 60 4.0\n", + "1 10 40 3.0 60 2.0 70 2.0\n", + "2 20 40 5.0 20 4.0 70 4.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Estimations:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
useritemest_score
00604.0
110403.0
22003.0
320204.0
420704.0
\n", + "
" + ], + "text/plain": [ + " user item est_score\n", + "0 0 60 4.0\n", + "1 10 40 3.0\n", + "2 20 0 3.0\n", + "3 20 20 4.0\n", + "4 20 70 4.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import evaluation_measures as ev\n", + "import imp\n", + "import helpers\n", + "imp.reload(ev)\n", + "\n", + "dir_path=\"Recommendations generated/toy-example/\"\n", + "super_reactions=[4,5]\n", + "test=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None)\n", + "\n", + "display(ev.evaluate_all(test, dir_path, super_reactions, topK=3))\n", + "#also you can just type ev.evaluate_all() - I put above values as default\n", + "\n", + "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", + "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", + "reco=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_reco.csv', header=None)\n", + "estimations=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_estimations.csv', names=['user', 'item', 'est_score'])\n", + "toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \\\n", + "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n", + "\n", + "print('Training data:')\n", + "display(toy_train_ui.todense())\n", + "\n", + "print('Test data:')\n", + "display(toy_test_ui.todense())\n", + "\n", + "print('Recommendations:')\n", + "display(reco)\n", + "\n", + "print('Estimations:')\n", + "display(estimations)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sample recommendations" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Here is what user rated high:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userratingtitlegenres
540923655Boogie Nights (1997)Drama
552433655Celluloid Closet, The (1995)Documentary
216373655In & Out (1997)Comedy
365083655Swingers (1996)Comedy, Drama
193983655Scream (1996)Horror, Thriller
143433655Fargo (1996)Crime, Drama, Thriller
237383655Chasing Amy (1997)Drama, Romance
699603655Beautiful Thing (1996)Drama, Romance
547533654Scream 2 (1997)Horror, Thriller
545523654Sense and Sensibility (1995)Drama, Romance
300513654Star Trek: First Contact (1996)Action, Adventure, Sci-Fi
470863654Primal Fear (1996)Drama, Thriller
629313654James and the Giant Peach (1996)Animation, Children's, Musical
389393654Full Monty, The (1997)Comedy
387643654First Wives Club, The (1996)Comedy
\n", + "
" + ], + "text/plain": [ + " user rating title \\\n", + "54092 365 5 Boogie Nights (1997) \n", + "55243 365 5 Celluloid Closet, The (1995) \n", + "21637 365 5 In & Out (1997) \n", + "36508 365 5 Swingers (1996) \n", + "19398 365 5 Scream (1996) \n", + "14343 365 5 Fargo (1996) \n", + "23738 365 5 Chasing Amy (1997) \n", + "69960 365 5 Beautiful Thing (1996) \n", + "54753 365 4 Scream 2 (1997) \n", + "54552 365 4 Sense and Sensibility (1995) \n", + "30051 365 4 Star Trek: First Contact (1996) \n", + "47086 365 4 Primal Fear (1996) \n", + "62931 365 4 James and the Giant Peach (1996) \n", + "38939 365 4 Full Monty, The (1997) \n", + "38764 365 4 First Wives Club, The (1996) \n", + "\n", + " genres \n", + "54092 Drama \n", + "55243 Documentary \n", + "21637 Comedy \n", + "36508 Comedy, Drama \n", + "19398 Horror, Thriller \n", + "14343 Crime, Drama, Thriller \n", + "23738 Drama, Romance \n", + "69960 Drama, Romance \n", + "54753 Horror, Thriller \n", + "54552 Drama, Romance \n", + "30051 Action, Adventure, Sci-Fi \n", + "47086 Drama, Thriller \n", + "62931 Animation, Children's, Musical \n", + "38939 Comedy \n", + "38764 Comedy " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Here is what we recommend:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userrec_nbtitlegenres
363365.01Great Day in Harlem, A (1994)Documentary
1305365.02Tough and Deadly (1995)Action, Drama, Thriller
2248365.03Aiqing wansui (1994)Drama
3189365.04Delta of Venus (1994)Drama
4132365.05Someone Else's America (1995)Drama
5073365.06Saint of Fort Washington, The (1993)Drama
6015365.07Celestial Clockwork (1994)Comedy
6958365.08Some Mother's Son (1996)Drama
8852365.09Maya Lin: A Strong Clear Vision (1994)Documentary
7898365.010Prefontaine (1997)Drama
\n", + "
" + ], + "text/plain": [ + " user rec_nb title \\\n", + "363 365.0 1 Great Day in Harlem, A (1994) \n", + "1305 365.0 2 Tough and Deadly (1995) \n", + "2248 365.0 3 Aiqing wansui (1994) \n", + "3189 365.0 4 Delta of Venus (1994) \n", + "4132 365.0 5 Someone Else's America (1995) \n", + "5073 365.0 6 Saint of Fort Washington, The (1993) \n", + "6015 365.0 7 Celestial Clockwork (1994) \n", + "6958 365.0 8 Some Mother's Son (1996) \n", + "8852 365.0 9 Maya Lin: A Strong Clear Vision (1994) \n", + "7898 365.0 10 Prefontaine (1997) \n", + "\n", + " genres \n", + "363 Documentary \n", + "1305 Action, Drama, Thriller \n", + "2248 Drama \n", + "3189 Drama \n", + "4132 Drama \n", + "5073 Drama \n", + "6015 Comedy \n", + "6958 Drama \n", + "8852 Documentary \n", + "7898 Drama " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", + "items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n", + "\n", + "user=random.choice(list(set(train['user'])))\n", + "\n", + "train_content=pd.merge(train, items, left_on='item', right_on='id')\n", + "\n", + "print('Here is what user rated high:')\n", + "display(train_content[train_content['user']==user][['user', 'rating', 'title', 'genres']]\\\n", + " .sort_values(by='rating', ascending=False)[:15])\n", + "\n", + "reco = np.loadtxt('Recommendations generated/ml-100k/Self_BaselineUI_reco.csv', delimiter=',')\n", + "items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n", + "\n", + "# Let's ignore scores - they are not used in evaluation: \n", + "reco_users=reco[:,:1]\n", + "reco_items=reco[:,1::2]\n", + "# Let's put them into one array\n", + "reco=np.concatenate((reco_users, reco_items), axis=1)\n", + "\n", + "# Let's rebuild it user-item dataframe\n", + "recommended=[]\n", + "for row in reco:\n", + " for rec_nb, entry in enumerate(row[1:]):\n", + " recommended.append((row[0], rec_nb+1, entry))\n", + "recommended=pd.DataFrame(recommended, columns=['user','rec_nb', 'item'])\n", + "\n", + "recommended_content=pd.merge(recommended, items, left_on='item', right_on='id')\n", + "\n", + "print('Here is what we recommend:')\n", + "recommended_content[recommended_content['user']==user][['user', 'rec_nb', 'title', 'genres']].sort_values(by='rec_nb')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# project task 2: implement some other evaluation measure" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# it may be your idea, modification of what we have already implemented \n", + "# (for example Hit2 rate which would count as a success users whoreceived at least 2 relevant recommendations) \n", + "# or something well-known\n", + "# expected output: modification of evaluation_measures.py such that evaluate_all will also display your measure" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "943it [00:00, 4859.65it/s]\n", + "943it [00:00, 4809.91it/s]\n", + "943it [00:00, 4678.68it/s]\n", + "943it [00:00, 3240.04it/s]\n", + "943it [00:00, 4796.98it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.1374730.2146510.1117070.4009390.5555460.7656421.0000000.0389613.1590790.987317
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379641.0000000.0339112.8365130.991139
0Ready_Random1.5238991.2267990.0468720.0223670.0252970.0322690.0311160.0278430.0514140.0197690.1275580.5076960.3329800.9875930.1847045.1047100.906035
0Self_TopRated1.0307120.8209040.0009540.0001880.0002980.0004810.0006440.0002230.0010430.0003350.0033480.4964330.0095440.6990460.0050511.9459100.995669
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.0001890.0007520.0001680.0016770.4964240.0095440.6005300.0050511.8031260.996380
\n", + "
" + ], + "text/plain": [ + " Model RMSE MAE precision recall F_1 \\\n", + "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", + "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", + "0 Ready_Random 1.523899 1.226799 0.046872 0.022367 0.025297 \n", + "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n", + "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", + "\n", + " F_05 precision_super recall_super NDCG mAP MRR \\\n", + "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n", + "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n", + "0 0.032269 0.031116 0.027843 0.051414 0.019769 0.127558 \n", + "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n", + "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n", + "\n", + " LAUC HR Reco in test Test coverage Shannon Gini \n", + "0 0.555546 0.765642 1.000000 0.038961 3.159079 0.987317 \n", + "0 0.515501 0.437964 1.000000 0.033911 2.836513 0.991139 \n", + "0 0.507696 0.332980 0.987593 0.184704 5.104710 0.906035 \n", + "0 0.496433 0.009544 0.699046 0.005051 1.945910 0.995669 \n", + "0 0.496424 0.009544 0.600530 0.005051 1.803126 0.996380 " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir_path=\"Recommendations generated/ml-100k/\"\n", + "super_reactions=[4,5]\n", + "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", + "\n", + "ev.evaluate_all(test, dir_path, super_reactions)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/P2. Evaluation.pdf b/P2. Evaluation.pdf new file mode 100644 index 0000000..44b1b4c Binary files /dev/null and b/P2. Evaluation.pdf differ diff --git a/README.md b/README.md index d83e084..24d190e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,3 @@ -# introduction_to_recommender_systems +# Introduction to recommender systems Materiały do przedmiotu Wprowadzenie do systemów rekomendacyjnych, semestr letni 2020/2021. \ No newline at end of file diff --git a/evaluation_measures.py b/evaluation_measures.py new file mode 100644 index 0000000..ced6c1b --- /dev/null +++ b/evaluation_measures.py @@ -0,0 +1,214 @@ +import os +import sys +import numpy as np +import pandas as pd +import math +from sklearn.preprocessing import normalize +from tqdm import tqdm +from datetime import datetime, date +import random +import scipy.sparse as sparse +from os import listdir +from os.path import isfile, join +from collections import defaultdict + + +def evaluate(test, + estimations_df, + reco, + super_reactions=[4,5], + topK=10): + + estimations_df=estimations_df.copy() + reco=reco.copy() + test_df=test.copy() + + # prepare testset + test_df.columns=['user', 'item', 'rating', 'timestamp'] + test_df['user_code'] = test_df['user'].astype("category").cat.codes + test_df['item_code'] = test_df['item'].astype("category").cat.codes + + user_code_id = dict(enumerate(test_df['user'].astype("category").cat.categories)) + user_id_code = dict((v, k) for k, v in user_code_id.items()) + item_code_id = dict(enumerate(test_df['item'].astype("category").cat.categories)) + item_id_code = dict((v, k) for k, v in item_code_id.items()) + + test_ui = sparse.csr_matrix((test_df['rating'], (test_df['user_code'], test_df['item_code']))) + + #prepare estimations + estimations_df.columns=['user', 'item' ,'score'] + estimations_df['user_code']=[user_id_code[user] for user in estimations_df['user']] + estimations_df['item_code']=[item_id_code[item] for item in estimations_df['item']] + estimations=sparse.csr_matrix((estimations_df['score'], (estimations_df['user_code'], estimations_df['item_code'])), shape=test_ui.shape) + + #compute_estimations + estimations_df=estimations_metrics(test_ui, estimations) + + #prepare reco + users=reco[:,:1] + items=reco[:,1::2] + # Let's use inner ids instead of real ones + users=np.vectorize(lambda x: user_id_code.setdefault(x, -1))(users) # maybe users we recommend are not in test set + items=np.vectorize(lambda x: item_id_code.setdefault(x, -1))(items) # maybe items we recommend are not in test set + # Let's put them into one array + reco=np.concatenate((users, items), axis=1) + + #compute ranking metrics + ranking_df=ranking_metrics(test_ui, reco, super_reactions=super_reactions, topK=topK) + + #compute diversity metrics + diversity_df=diversity_metrics(test_ui, reco, topK) + + result=pd.concat([estimations_df, ranking_df, diversity_df], axis=1) + + return(result) + + +def ranking_metrics(test_ui, reco, super_reactions=[], topK=10): + + nb_items=test_ui.shape[1] + relevant_users, super_relevant_users, prec, rec, F_1, F_05, prec_super, rec_super, ndcg, mAP, MRR, LAUC, HR=\ + 0,0,0,0,0,0,0,0,0,0,0,0,0 + + cg = (1.0 / np.log2(np.arange(2, topK + 2))) + cg_sum = np.cumsum(cg) + + for (nb_user, user) in tqdm(enumerate(reco[:,0])): + u_rated_items=test_ui.indices[test_ui.indptr[user]:test_ui.indptr[user+1]] + nb_u_rated_items=len(u_rated_items) + if nb_u_rated_items>0: # skip users with no items in test set (still possible that there will be no super items) + relevant_users+=1 + + u_super_items=u_rated_items[np.vectorize(lambda x: x in super_reactions)\ + (test_ui.data[test_ui.indptr[user]:test_ui.indptr[user+1]])] + # more natural seems u_super_items=[item for item in u_rated_items if test_ui[user,item] in super_reactions] + # but accesing test_ui[user,item] is expensive -we should avoid doing it + if len(u_super_items)>0: + super_relevant_users+=1 + + user_successes=np.zeros(topK) + nb_user_successes=0 + user_super_successes=np.zeros(topK) + nb_user_super_successes=0 + + # evaluation + for (item_position,item) in enumerate(reco[nb_user,1:topK+1]): + if item in u_rated_items: + user_successes[item_position]=1 + nb_user_successes+=1 + if item in u_super_items: + user_super_successes[item_position]=1 + nb_user_super_successes+=1 + + prec_u=nb_user_successes/topK + prec+=prec_u + + rec_u=nb_user_successes/nb_u_rated_items + rec+=rec_u + + F_1+=2*(prec_u*rec_u)/(prec_u+rec_u) if prec_u+rec_u>0 else 0 + F_05+=(0.5**2+1)*(prec_u*rec_u)/(0.5**2*prec_u+rec_u) if prec_u+rec_u>0 else 0 + + prec_super+=nb_user_super_successes/topK + rec_super+=nb_user_super_successes/max(len(u_super_items),1) + ndcg+=np.dot(user_successes,cg)/cg_sum[min(topK, nb_u_rated_items)-1] + + cumsum_successes=np.cumsum(user_successes) + mAP+=np.dot(cumsum_successes/np.arange(1,topK+1), user_successes)/min(topK, nb_u_rated_items) + MRR+=1/(user_successes.nonzero()[0][0]+1) if user_successes.nonzero()[0].size>0 else 0 + LAUC+=(np.dot(cumsum_successes, 1-user_successes)+\ + (nb_user_successes+nb_u_rated_items)/2*((nb_items-nb_u_rated_items)-(topK-nb_user_successes)))/\ + ((nb_items-nb_u_rated_items)*nb_u_rated_items) + + HR+=nb_user_successes>0 + + + result=[] + result.append(('precision', prec/relevant_users)) + result.append(('recall', rec/relevant_users)) + result.append(('F_1', F_1/relevant_users)) + result.append(('F_05', F_05/relevant_users)) + result.append(('precision_super', prec_super/super_relevant_users)) + result.append(('recall_super', rec_super/super_relevant_users)) + result.append(('NDCG', ndcg/relevant_users)) + result.append(('mAP', mAP/relevant_users)) + result.append(('MRR', MRR/relevant_users)) + result.append(('LAUC', LAUC/relevant_users)) + result.append(('HR', HR/relevant_users)) + + df_result=pd.DataFrame() + if len(result)>0: + df_result=(pd.DataFrame(list(zip(*result))[1])).T + df_result.columns=list(zip(*result))[0] + return df_result + + +def estimations_metrics(test_ui, estimations): + result=[] + + RMSE=(np.sum((estimations.data-test_ui.data)**2)/estimations.nnz)**(1/2) + result.append(['RMSE', RMSE]) + + MAE=np.sum(abs(estimations.data-test_ui.data))/estimations.nnz + result.append(['MAE', MAE]) + + df_result=pd.DataFrame() + if len(result)>0: + df_result=(pd.DataFrame(list(zip(*result))[1])).T + df_result.columns=list(zip(*result))[0] + return df_result + +def diversity_metrics(test_ui, reco, topK=10): + + frequencies=defaultdict(int) + + for item in list(set(test_ui.indices)): + frequencies[item]=0 + + for item in reco[:,1:].flat: + frequencies[item]+=1 + + nb_reco_outside_test=frequencies[-1] + del frequencies[-1] + + frequencies=np.array(list(frequencies.values())) + + nb_rec_items=len(frequencies[frequencies>0]) + nb_reco_inside_test=np.sum(frequencies) + + frequencies=frequencies/np.sum(frequencies) + frequencies=np.sort(frequencies) + + with np.errstate(divide='ignore'): # let's put zeros we items with 0 frequency and ignore division warning + log_frequencies=np.nan_to_num(np.log(frequencies), posinf=0, neginf=0) + + result=[] + result.append(('Reco in test', nb_reco_inside_test/(nb_reco_inside_test+nb_reco_outside_test))) + result.append(('Test coverage', nb_rec_items/test_ui.shape[1])) + result.append(('Shannon', -np.dot(frequencies, log_frequencies))) + result.append(('Gini', np.dot(frequencies, np.arange(1-len(frequencies), len(frequencies), 2))/(len(frequencies)-1))) + + df_result=(pd.DataFrame(list(zip(*result))[1])).T + df_result.columns=list(zip(*result))[0] + return df_result + + + +def evaluate_all(test, + dir_path="Recommendations generated/ml-100k/", + super_reactions=[4,5], + topK=10): + + models = list(set(['_'.join(f.split('_')[:2]) for f in listdir(dir_path) + if isfile(dir_path+f)])) + result=[] + for model in models: + estimations_df=pd.read_csv('{}{}_estimations.csv'.format(dir_path, model), header=None) + reco=np.loadtxt('{}{}_reco.csv'.format(dir_path, model), delimiter=',') + to_append=evaluate(test, estimations_df, reco, super_reactions, topK) + + to_append.insert(0, "Model", model) + result.append(to_append) + result=pd.concat(result) + result=result.sort_values(by='recall', ascending=False) + return result \ No newline at end of file diff --git a/helpers.py b/helpers.py new file mode 100644 index 0000000..94e5f3b --- /dev/null +++ b/helpers.py @@ -0,0 +1,90 @@ +import pandas as pd +import numpy as np +import scipy.sparse as sparse +import surprise as sp +import time +from collections import defaultdict +from itertools import chain +from six.moves.urllib.request import urlretrieve +import zipfile +import os + +def data_to_csr(train_read, test_read): + train_read.columns=['user', 'item', 'rating', 'timestamp'] + test_read.columns=['user', 'item', 'rating', 'timestamp'] + + # Let's build whole dataset + train_and_test=pd.concat([train_read, test_read], axis=0, ignore_index=True) + train_and_test['user_code'] = train_and_test['user'].astype("category").cat.codes + train_and_test['item_code'] = train_and_test['item'].astype("category").cat.codes + + user_code_id = dict(enumerate(train_and_test['user'].astype("category").cat.categories)) + user_id_code = dict((v, k) for k, v in user_code_id.items()) + item_code_id = dict(enumerate(train_and_test['item'].astype("category").cat.categories)) + item_id_code = dict((v, k) for k, v in item_code_id.items()) + + train_df=pd.merge(train_read, train_and_test, on=list(train_read.columns)) + test_df=pd.merge(test_read, train_and_test, on=list(train_read.columns)) + + # Take number of users and items + (U,I)=(train_and_test['user_code'].max()+1, train_and_test['item_code'].max()+1) + + # Create sparse csr matrices + train_ui = sparse.csr_matrix((train_df['rating'], (train_df['user_code'], train_df['item_code'])), shape=(U, I)) + test_ui = sparse.csr_matrix((test_df['rating'], (test_df['user_code'], test_df['item_code'])), shape=(U, I)) + + return train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code + + +def get_top_n(predictions, n=10): + + # Here we create a dictionary which items are lists of pairs (item, score) + top_n = defaultdict(list) + for uid, iid, true_r, est, _ in predictions: + top_n[uid].append((iid, est)) + + result=[] + # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...) + for uid, user_ratings in top_n.items(): + user_ratings.sort(key=lambda x: x[1], reverse=True) + result.append([uid]+list(chain(*user_ratings[:n]))) + return result + + +def ready_made(algo, reco_path, estimations_path): + reader = sp.Reader(line_format='user item rating timestamp', sep='\t') + trainset = sp.Dataset.load_from_file('./Datasets/ml-100k/train.csv', reader=reader) + trainset = trainset.build_full_trainset() # -> it is needed for using Surprise package + + testset = sp.Dataset.load_from_file('./Datasets/ml-100k/test.csv', reader=reader) + testset = sp.Trainset.build_testset(testset.build_full_trainset()) + + algo.fit(trainset) + + antitrainset = trainset.build_anti_testset() # We want to predict ratings of pairs (user, item) which are not in train set + print('Generating predictions...') + predictions = algo.test(antitrainset) + print('Generating top N recommendations...') + top_n = get_top_n(predictions, n=10) + top_n=pd.DataFrame(top_n) + top_n.to_csv(reco_path, index=False, header=False) + + print('Generating predictions...') + predictions = algo.test(testset) + predictions_df=[] + for uid, iid, true_r, est, _ in predictions: + predictions_df.append([uid, iid, est]) + predictions_df=pd.DataFrame(predictions_df) + predictions_df.to_csv(estimations_path, index=False, header=False) + + +def download_movielens_100k_dataset(force=False): + os.makedirs('./Datasets/', exist_ok = True) + if not os.path.isdir('Datasets/ml-100k') or force: + url = 'http://files.grouplens.org/datasets/movielens/ml-100k.zip' + tmp_file_path = 'Datasets/ml-100k.zip' + urlretrieve(url, tmp_file_path) + + with zipfile.ZipFile(tmp_file_path, 'r') as tmp_zip: + tmp_zip.extractall('Datasets/') + os.remove(tmp_file_path) \ No newline at end of file