workshops_recommender_systems/P0. Data preparation.ipynb

683 lines
71 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Building train and test sets"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# if you don't have some library installed try using pip or pip3 to install it - you can do it from the notebook\n",
"# example: !pip install tqdm\n",
"# also on labs it's better to use python3 kernel - ipython3 notebook\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import scipy.sparse as sparse\n",
"import time\n",
"import random\n",
"import evaluation_measures as ev\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# df = pd.DataFrame(np.loadtxt( './Datasets/ml-1m.dat',delimiter='::'))\n",
"df=pd.read_csv('./Datasets/ml-100k/u.data',delimiter='\\t', header=None)\n",
"df.columns=['user', 'item', 'rating', 'timestamp']\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"train, test = train_test_split(df, test_size=0.2, random_state=30)\n",
"\n",
"train.to_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, index=False)\n",
"test.to_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None, index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Interactions properties"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### How data looks like?"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user</th>\n",
" <th>item</th>\n",
" <th>rating</th>\n",
" <th>timestamp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>196</td>\n",
" <td>242</td>\n",
" <td>3</td>\n",
" <td>881250949</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>186</td>\n",
" <td>302</td>\n",
" <td>3</td>\n",
" <td>891717742</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>22</td>\n",
" <td>377</td>\n",
" <td>1</td>\n",
" <td>878887116</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>244</td>\n",
" <td>51</td>\n",
" <td>2</td>\n",
" <td>880606923</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>166</td>\n",
" <td>346</td>\n",
" <td>1</td>\n",
" <td>886397596</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user item rating timestamp\n",
"0 196 242 3 881250949\n",
"1 186 302 3 891717742\n",
"2 22 377 1 878887116\n",
"3 244 51 2 880606923\n",
"4 166 346 1 886397596"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[:5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Sample properties"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"We have 943 users, 1682 items and 100000 ratings.\n",
"\n",
"Average number of ratings per user is 106.04. \n",
"\n",
"Average number of ratings per item is 59.453.\n",
"\n",
"Data sparsity (% of missing entries) is 6.3047%.\n"
]
}
],
"source": [
"users, items, ratings=len(set(df['user'])), len(set(df['item'])), len(df)\n",
"\n",
"print('We have {} users, {} items and {} ratings.\\n'.format(users, items, ratings))\n",
"\n",
"print('Average number of ratings per user is {}. \\n'.format(round(ratings/users,2)))\n",
"print('Average number of ratings per item is {}.\\n'.format(round(ratings/items,4)))\n",
"print('Data sparsity (% of missing entries) is {}%.'.format(round(100*ratings/(users*items),4)))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA6UAAAHvCAYAAACsfXllAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzde7wVdb3/8fdHQNOovKEBZtvMu+IGloZZSnlMk0uamfhL06MHzPKkWZ4oUtCsMPVE1smA9IhdtNKjcjHzirc03RvRYxxS0a1yERGvCKjg5/fHzN6s2bP22ot9me9s5vV8PNZjrzUza9Z7ffZsWJ81M98xdxcAAAAAACFsFjoAAAAAAKC4aEoBAAAAAMHQlAIAAAAAgqEpBQAAAAAEQ1MKAAAAAAiGphQAAAAAEAxNKQBkxMwmmZnHt+Gh8/Q0Zratmf3IzB41szfN7L24lq+FztZdzOzqsm2mLnQeAAC6A00pgC5X9iG6+faJGp4zpmz5SRnERA9iZjtKapT0fUn1kvpKsqChOsDMhsdfTkyiyQQAINI7dAAAhfBjSYeFDoEebYKkuvj+A5J+J2m5JJf0bqBMHTFc0sT4/lxJTaGCAACQFzSlALLwWTP7F3e/I3QQ9FhHxT9flfQ5d18dMkxW3P0USacEjgEAQLfi8F0A3am8cfhJsBTYFHwk/vnPojSkAAAUBU0pgO70gqQb4/slM/tiyDDo0TaPf74dNAUAAOhyNKUAutsPJL0X37/IzHp1dEVlAyHN7eyyZja3eZn48WZmdmo8/SUze8vM/tfMJpjZB1o998Nm9kMze9zM3jCz183sXjM7vgPv6Sgzu9nMFpvZ2/HPa83soI1Yx/ZxzvvM7EUze8fMVsSP/6N1/grPb4pr0RQ/fp+ZfdPM7jez5fEot3M39r2VrX8rM/uWmd0d53s7rvH9ZvY9M/tQG89rGa24bPKhFQbSGr6ReU4pe+4p8bSSmf3GzJ6Of/eJ9Vrk0/Hov3eZ2dL4fbxlZs+a2XVmNsrMKg6+1PxetOF8Ukm6u8J7mdvqeVVH340HTkoMEGZmO5vZZWa2MM73mpn9zcy+bmY1nbZjZseY2Zz497823kZ+Z/GgZZVq2MZ6vmBmfzKzZ8xsdbyuJWb2mJn91sy+Ymbb1JKpjfUn6mZm/czswvhv9/X477PRzMab2ZYbsd4j49o/ZdFIz6vNbFE87VPtPHejt68aM9U8EnMty1r0d/51M7vdzJbF2/Oq+Hf9sJn93Mw+b2Z92nmtg8zsCjNbEG9ra83seTP7o5mNaOe5lbbfPcxsipn9X/z7q7qNAdhEuDs3bty4delN0eAzLmlh/PjqsmmntPGcMWXLTGpnvXM3IkPFZRUNMtO8TF9Jd5Q9bn2bJ2mb+HkHSXqpyrKXVsk0qWy54ZL+q8p61kuaWMP7PEXSG1XW45JelHRQlXU0xcs1SdpF0hMV1tFuzdtY9zBJS9rJ97Ki80Sr1avabfhGZjqlfHuUNF7SumrrlfTfNWb5i6QPduK9zG31vKvL5tVVWO/wsvmTJB2p6LzbttZ/m6QtqtSmj6Q/VXn+Oknfbl3DCuvZUtLsGt/z2V3wb81cSYMlLa7yOk9J2qWd9fWTdGcNmX8jqU9XbV81vteq28JGbje7xvWo5fdT38ZrvF/StTU8f7akD7Sxjtbb71cVnfbReh2pbYwbN26b1o2BjgBkYaKkExQdgjnJzP7g7u8EzlTuvxWNDvyAog/kL0r6qKRvxD8HS5piZhMl/VXR+/iNpPslvSPp05LGKho87ttm9ld3v72d1zxL0tGKGrLfSHpc0laKmopjFR3JMsnMVrr7LyutwMzOkjQlfvi2pBsk3SdppaRt43V9QdKOku4wswPcfUGVTFtI+h9J+8Tv7QZJSxV9UN+xnfdTKV+9pLsUNSiS9KikP0h6XtKHJX1Z0sGStpM028w+5+5zy1ZxnaT58f3mw8D/oWjve7knNjZbmS9L+ryk1yXNUHTZmfWS9o+nNdtSUY3vkfSwpEWS3lJUm90lnaQNNb9G0e+2XPN7GSOpeY/6eRWyv9yJ91Iv6VxFl8qZKunBOHNJ0tcUNRGHKxrJ+Pw21jFN0nHx/bWKmpsHFdWkJOk0SZdKur6dLD+W1LyXbJmi0ZL/IWmVoi+BPq7oC55Dan97VX1I0bY7UFHjfZOkVyTtEWfeOX7NO82s3t3faL0CM9tW0XvdNZ60QNKfJT2p6GiPfRQ1mTvF6+yt9gehqnX7yky8N//PiuohRdvl9ZKeUTSS9TaS9pL0GUXbVKV1bKHoi7xh8aTnFTWo/1C0zX1cUYO5h6Lt4CYzO9zd36uwumYHK9o210u6UtG/x2vjdbzYgbcKoCcJ3RVz48Zt07tpw7fbC8umXV42/ZsVnhNyT6lL+n6FZfppw16+dYo+vL0kaVCFZU8qW9ctbbzmpFav+YSkHSosd7SiD4euqPHZucIyQ8uWWShptzZec4Sixtkl/b2NZZpa5fpWF2wDmym5x3WKpM0qLHde2TLPS3pfZ3/3NWQ7pdX7/T9JA9p5zqclbV1l/vuV3MN4aA3bwPAasl5dtnxdhfnDW72X5yptC5IOLNteXlGFvaWKvphpXs8KSftWWKauwvZySqtlekl6LZ7XVGkbb/U3tmcnfpet96h9o8IyfSXdXbbML9pY141ly/ygje21r6IvppqXO7Irtq8a32vVbaHWZRV9udA8b5akXlXWs7ek7SpM/1nZOq6QtHmFZfooasSbl/taDdvvMkl7d7ZW3Lhx63k3zikFkJWLFDVYkjTBzPqGDNPKX939x60nuvsKSc17KXsp2rtxprs/XmHZ3yo6HE6SDqvh3L11ko5395cqrOsmSZfFD7eSdEaF509UtKfmbUkj3f2pCsvI3edImhw/PNDMPtlOrhvd/WftLFOLkYr2LEnSQ4oa3dReEnf/oaQ58cOPSDqxC157Y7ikMe6+tOpC7ve5+2tV5r+laO9Z8zZ+UtdF3CgnVtoW3P1hSX+MH26jqElt7Vtl989099QeaHdvUvt7B/sp2nMpSTdX2sbL1rfC3Re2s75aXefu/1XhNVYp+tKree/oaWa2dfkyZjZEG/ZuX+XuF7WxvTavq3kv5zntZKpp+8rYx8vuX+Xu69ta0N0XuPvK8mlm1l/S1+OHd7r7GV7hyBd3f1fSvynaAyu1XytJOt2rH80BYBNFUwogE/EH0+ZDTXeQdHbAOK1VPDw29kDZ/eWqftji/fHPzbXhEMC2/NXd/1Fl/hRFh7FJ0jHlM+KBYZoPjbzZ3Z9u57V+V3b/c+0s+4t25teqfKTlS9zdqyw7uex+1iM03+fuj3XFitz9TUn/Gz/8RFescyM96u73VZl/V9n9vctnmNn7tGHbWKbo8M6KPDrEOvXFTJk1bb1ON7usrRnuvlwb/g62VHSYdbnyLxEurfYi7v6qpFvih4fEh7K2pcu2ry5Ufkmlfdpcqm1f1obRsNusudTSmDZ/GbJbOwM0Padozy2AAuKcUgBZukTRXr9tJX3HzH7l7q8EziRJf68yb3nZ/cZKe0/aWLa9EUXvrDbT3V80s/+TtK+k3c3sQ+7evHfmYG34UnGtmbU+f7G18tEz96qy3HpF59R1heY9cS6pvfNr/6YN5xpm3cxVa+IS4ubjy4rO091f0Xm2fRWdw9naTl2SbuM81M78JWX3W2+f+2vDdnJPO9u5FB3+PqjSDHd/3cweVrQN/IuZ3ajoy4774ialO7yu6HzNau7Shj18Byg6z7fZp+Of70jaw8z2aGddW5T9/JiiQ3QrqXn7ytD9ir442FLSxPhLrhmVjgBpw6fL7u9Qw78/5dvaXooO6a6Yq50vrwBswmhKAWQm/rB6saSLFR3eN17Sf4RNJSkaGKgt5dfFrLZc62Xf186y7e3dbF5mX0VNz4e14ZDBurJlvhrfalWtWV7p7ms3Yl3V9I9/vhjvQWyTu79nZosUNUbbmtnmlQ4H7CZL2l9EMrP9FA38tFuN6/1ghxN1XHuDJFXbPgeU3X9G7WtvmW8o+uLlg4oOiz1a0ltm9ndFTdEdkh6oofmt1aIaGpryv7kBrebVxT8314ZBtWpV7W+qpu0rS+7+ipl9S9G5oL0VHVZ7jpm9pOgLovsk/cXd22q068ruX72RL9+jagUgOxy+CyBrv1A0oqsknWlmrT8cZm4jPhh31QdoKXkIXVveKrtffg5uxet61mjzKvPWVJm3sZqvjfpW1aU2WFXhuVlo9z3Ho7LeoQ0N6QuKPtCfJen/KTrk+Jj41nxIdoj/Xzuzfb6/7P7Gbpsp7t6gaOTWa7Shxu+X9FlFI//eK2mRmXXVOcSd+XuSesbfVJdx96mKRte9Uxu2mx0UfXlwmaQFZvaAmVU697hQtQKQDfaUAsiUu68xsx8q+lC/paIPqF/rytcws57whdtWNSxT3iisauP+Ke4+o2sidak3JW2t5HuoprxJqLpnNYAzFX1gl6LRRP/N3ddVWtDMJmSWqmuVN2wbu21W5O7PSjrZzE5XdPmXT0r6lKRDFf3t10n6rZntXGmgsY3Umb+n5sdbS2py9106mSUP2v030N3vkXSPmW2n6JDcgxT9bg6In/9JSfdXuFRTc+3WSdqyrb8FANgYPeGDG4BNz5WKrvMoRSNhfrzawmWaD+ms9m27JG3foVTZquU9Ny/jSl6nr/wwt44MVJKFZfHPD5tZ1T2f8XUTmweGWpnhobu1+pf45zpJZ7fzIfyjGeTpDuWjw36shuVrWUaS5O5r3f1ud/+Ru39eUYP/XUXbtSSdHzdGnbFrvB1VU/4313o03Oa/qY+YWYhDr2tRfvh1l/0b6O4r3f0md/+uuw9TdE3XP8Sz+yg98FNzrXorukYvAHQaTSmAzMWDnZwfP+wt6cIan9p8SY72DvkNMfLpxvpstZlm9mFtGJToybJBjqTo0MfmD/RfyOme4Yfjn6bo+pfVfFIb9pQ+XG3BQHaMf66sdlkYMxus6HIo1ZQfYtteE5WlxxRdx1SKRpRtb5sa3tEXcvdV7v5TRefoStFgQQd0dH2xD0ka0s4ynym7/0ireffEP3tJGtXJLN2lfNtr899AM+ul6FqkHeLuSySdrA1fhA01sy3LFrmn7H5iZHAA6Kg8fpABUAzXasNlJcYoGuSmPc3Xr/uomVXbU/PNzgTLyJFmVm0k3G8q+oAsSf9TPiO+vM6t8cPdFV0fM29uKLv/nXb2Yn23jeflRfP5iju0s9f3/CrzmpUfNlrroc3dLh7g6rb44QBJx7W1rJkNVxsj726kprL7XXE6UZvXwTSzftpwDdw12vD30+yasvvnm1lufjdlyq/fWe1LrTFq/8uRquKjARaXTSr//VynDUetfCv+Ag0AOoWmFEAQ8UiZzeffmaR/r+Fp5R8kL67U6JjZhdpwuGWe9Zb0x/jDcoKZjZL0nfjhakXn37b2A23Ys/WL9gaMMbOdzewSM9uh2nJdaI42DPpzsKRLKu19M7Pva8OeqRck/T6beBulea+aSbqo9UyLXKhokJj2PFt2v709e1mbUnb/l2a2b+sF4utMXl1tJWY22MzOM7MdqyyzvTY0vq7q1z2t1f8zs9T56XGDea02DNBzZes93u7+d234QmR3SbPayd/bzI42s6+3tUw3uF0brl38DTNLHSpuZiW1c61hM/uKmf1rq72frZcZJmlw/PCZ8hG03f2FstfYTtJfq52CEf99HNaDz7cGkAEGOgIQjLvPNrO/KTp8s5Y9E1cpuoTMtpK+JOk+M/u9okth7KxoD0FJ0Tf5Y7oldNe5SVET8w8zmy7pfxUN1nKEog/rzQ33d+MPgQnuPs/MzpA0XdHhj781s29LulnRpS/eVjRwy56KmsID43X+vDvfVFm+98zsJEkPKBrU5tuSPhP/vhYrOiT2y4oGvpGiBvurXXhJmq70K0mnKtpz/U0zq1e09/pFSR9RNALvYEV7stZIGlplXfcpeq99JJ1rZs0NWfP5gq+4e5BDmN39DjO7WtIpis5JfCR+/DdFhx2XFNXhg5KuV/Q3KKVH/f2QokPyJ5rZA/Hzn1Q0gNW2kvZTVLNt4+V/7+7PdzL+fEXb+xXxdTNvVHS4a/ORBM0N3LPa8GVYa6fGy++n6FDfZ8zsekXX7n1Z0WV0+iv6MuFzcf4rO5m7Zu6+1Mz+IOmk+LUfMbNfKdru+io6pPoESa8quiZrW3tTd5M0UdGXWbcr+tLlBUXb4A6KBj06WhuO1Kg0CNX3FI2ufJiiveYLzOxmRacWvKho+95R0REwhyva+36npB917N0D2NTRlAII7XtKnqPUJndfEe8R/B9FHxAPjm/lZiv6EJr3pvTnigYM+Yak71eY75IudPdftrUCd78yvrbgdEUfAOvjW1tWSsqs6XP3R83sMEV7oJo/zFfaO/iKpP/XaoTP3HD3+Wb275J+qegIo0PiW7n/k/QFSb9pZ10vm9mlirb7vkqfT32POnG+ZhcYpyjXlxT9jX1NydGx31O0F/91bWhKW4+W3Nyk9lLlWpX7U/yanfW6pH9V9Pd/RHxrbZGkz7n7G5VW4O5vmNnBiv6ejlf0JVF71wFuPWBSdztbUdNcr+gQ3Ymt5i9TdJ7nGVXW0fz7eb82XEO2knclnefuqcbb3d81s6MUXT7mDEVN6Je0YZuohOuQAmgTh+8CCMrd71X6/K5qy/9F0Qey/5b0vKJzm1ZIulvRHoTR7l7LNQuDc/czJY2QNEvRh9t34p9/lHSwu0+qYR2zJO2iqHGYqWiPxxptqMuDig61GyVpgLu/3OVvpHq+BxXtmTlHUcO1QtGH3ZVxtgmSdnX3v2aZa2O5+xWKvgD5s6I9Qe9KeknRXsBzJJXc/eka1/V9RXu0bo3XlZvRht39XXc/TtKxivKtULQH7XlFh1Yf7O6XKTpss9krrdZxr6Q9FDUrf5K0UNG5tO/FPxcoOurhUHc/3t275PqU7j5f0R7riyQ9oahZXiXpUUVf/Axy92faWceb7j5G0ZcnU+LnrlQ08vIqSU8pOsrhHEXbbS3nEXcZd39F0ZEl4+NsqxRdzmeBor2Q+8eHIlfzI0nDFNXkVkXn9q5R9B5fVTTY2MWS9nb3i6tkecfd/13R0RiTJf1d0fayTtFpB89KukUban/yxr9jAEVh0WldAAAAtTGzGyR9MX64XdwshcjR/CHmHncfHiIDAKDz2FMKAABqFg92NDJ++FiohhQAsOmgKQUAAJIkM9vVzHaqMn+gokGENo8nTc0kGABgk8ZARwAAoNlBkv7bzO5VNFLwIkXnG26n6DzELysaAEiSHpI0LURIAMCmhaYUAACU663ociJtXVJEkuZKOtbd11dZBgCAmtCUAgCAZjMlfUXSkYpGst1e0TUx35G0XNEIq9fFoz4DANAlcjH67vbbb+91dXWhY3SbFStWqF+/fqFjtMhbnqKg7gAAACiqxsbGl9294ofhXOwpraurU0NDQ+gYAAAAAIBuYGbPtTWP0XczMGnSpNAREvKWpyioOwAAAJCWi8N3S6WSb8p7Ss1Meahzs7zlKQrqDgAAgKIys0Z3L1Wax55SAAAAAEAwNKUAAAAAgGBoSjOQt0OT85anKKg7AAAAkEZTCgAAAAAIhoGOMpC3AW7ylqcoqDsAAACKioGOAAAAAAC51G5TamZXmdlLZvZE2bQ/mtn8+NZkZvPj6XVmtqZs3q+7MzwAAAAAoGfrXcMyV0v6paRrmie4+/HN983sMkmvly2/yN3ruyrgpmDixImhIyTkLU9RUHcAAAAgraZzSs2sTtJsd9+31XST9Lykz7r7U20t155N/ZxSAAAAACiy7jyn9NOSlrv7U2XTdjGzR83sHjP7dJVQ48yswcwaVqxY0ckY+TZgwIDQERLylqcoqDsAAACQ1tmm9ARJ15Y9XiZpZ3cfLOkcSX8wsw9WeqK7T3P3kruX+vXr18kY+bZs2bKW+y+88II+85nPaK+99tI+++yjn//85y3zJk2apIEDB6q+vl719fW65ZZbJEkPPPCABg0apAMOOEBPP/20JOm1117TEUcc0aHRXMvzdMZNN92kBQsWtDw+//zzdccdd0iShg8f3uHrct58880aNGiQ6uvrVSqVdP/990uS1q5dqwMPPFD777+/9tlnnx53OGx7df/nP//Z8ruvr6/XBz/4QU2ZMkVSdttGV+mubWPhwoU66KCDtMUWW+jSSy9NzV+/fr0GDx6skSNHtkybP3++hg0b1rI9Pfzwwx16bQAAAHQTd2/3JqlO0hOtpvWWtFzSTlWeN1dSqb31Dx061DdlUZkjS5cu9cbGRnd3f+ONN3y33Xbzf/zjH+7uPnHiRL/kkktSzz/mmGP8ySef9Ntuu83POeccd3c/55xzfO7cuZ3O0xknn3yy//nPf64479BDD/VHHnmkQ+t98803/b333nN398cee8z32GMPd3d/7733/M0333R393feeccPPPBAf/DBBzv0GiFsTN3XrVvnO+64ozc1Nbl7dttGV+mubWP58uX+8MMP+/e///2K9bjsssv8hBNO8BEjRrRMO/zww/2WW25xd/c5c+b4oYce2qHXBgAAQMdJavA2+sHO7Cn9F0kL3X1x8wQz62dmveL7H5O0m6RnOvEam4QhQ4a03O/fv3/L4w984APaa6+9tGTJkqrP79Onj9asWaPVq1erT58+WrRokZYsWaJDDz20zefceuut2nPPPfWpT31K3/zmN1v2HDXvcWu27777qqmpSZJ09NFHa+jQodpnn300bdq0lmX69u2rCRMmaP/999ewYcO0fPly/e1vf9PMmTN17rnnqr6+XosWLdIpp5yi66+/PpXltttu00EHHaQhQ4bouOOO06pVq6q+3759+yo6XVl66623Wu6bmfr27StJevfdd/Xuu++2zOsJyreD9tx5553adddd9dGPfrTqcl29bZTvfczjtrHDDjvogAMOUJ8+fVLzFi9erDlz5ujf/u3fEtPNTG+88YYk6fXXX+cwagAAgJyp5ZIw10p6UNIeZrbYzE6LZ41R8tBdSTpE0uNm9pik6yV9zd1f6crAPVFjY2PF6U1NTXr00Uf1iU98omXaL3/5Sw0aNEinnnqqXn31VUnS9773PY0bN05TpkzRmWeeqQkTJuiHP/xhm6+3du1ajR07VrNmzdJ9992nF198MTH/7LPPrvi8q666So2NjWpoaNDll1+ulStXSooaw2HDhumxxx7TIYccounTp+uTn/ykRo8erUsuuUTz58/XrrvuWnGdL7/8si666CLdcccdmjdvnkqlkv7zP/9TUnRI58yZMys+78Ybb9See+6pESNG6KqrrmqZvn79etXX12uHHXbQ4Ycfnqhd3rW1HVRy3XXX6YQTTkhMy2LbaEueto22nH322frpT3+qzTZL/rM2ZcoUnXvuufrIRz6i73znO/rJT36yUesFAABA92q3KXX3E9y9v7v3cfed3P3KePop7v7rVsve4O77uPv+7j7E3Wd1V/CeZNy4calpq1at0rHHHqspU6bogx+MTrs944wztGjRIs2fP1/9+/fXt7/9bUlSfX29HnroId1999165plnNGDAALm7jj/+eJ144olavnx5Yt0LFy7ULrvsot12201mphNPPDExv9IeK0m6/PLLW/Z4vfDCC3rqqWj8qs0337xlb9rQoUNb9p7V4qGHHtKCBQt08MEHq76+XjNmzNBzzz0nSbrwwgs1evTois875phjtHDhQt10000677zzWqb36tVL8+fP1+LFi/Xwww/riSeeqPj8PKq0HVTyzjvvaObMmTruuONapmW1bbQlT9tGJbNnz9YOO+ygoUOHpuZdccUV+tnPfqYXXnhBP/vZz3TaaadVWAMAAABC6exAR6jB9OnTE4/fffddHXvssfrKV76iL37xiy3Td9xxR/Xq1UubbbaZxo4dmxqQxd110UUX6bzzztMFF1ygCy64QCeeeKIuv/zy1Gu2dVhr79699fe//73l8dq1ayVJc+fO1R133KEHH3xQjz32mAYPHtwyr0+fPi3r69Wrl9atW1fze3d3HX744Zo/f77mz5+vBQsW6Morr6z5+YcccogWLVqkl19+OTF966231vDhw3XrrbfWvK7QWm8HbfnLX/6iIUOGaMcdd2yZltW28d5777U8zvu2Ue6BBx7QzJkzVVdXpzFjxuiuu+5qabhnzJjR8nd23HHHMdARAABAztCUZszdddppp2mvvfbSOeeck5hXPjrrjTfeqH33TV7udcaMGRoxYoS22WYbrV69Wptttpk222wzrV69OrHcnnvuqWeffVaLFi2SJF177YajrOvq6lruz5s3T88++6yk6Fy7bbbZRltttZUWLlyohx56qN338oEPfEBvvvlm1WWGDRumBx54oGVk2NWrV+vJJ5+s+pynn366ZeTYefPm6Z133tF2222nFStW6LXXXpMkrVmzRnfccYf23HPPdnP2NNdee23q0N2sto158+ZJyu+20Zaf/OQnWrx4sZqamnTdddfps5/9rH73u99Jii7Fc88990iS7rrrLu22224deg0AAAB0j96hA/QUdePnVJ3fNHlETet54IEH9Nvf/lb77bef6uvrJUk//vGPddRRR+k//uM/NH/+fJmZ6urqNHXq1JbnrV69WjNmzNBtt90mSTrnnHN07LHHavPNN080FpL0vve9T9OmTdOIESO0/fbb61Of+lTLYa7HHnusTjrpJNXX1+uAAw7Q7rvvLkk68sgj9etf/1qDBg3SHnvsoWHDhrX7XsaMGaOxY8fq8ssvb/OQ4H79+unqq6/WCSecoLfffluSdNFFF2n33XfX+eefr1KplDpM84YbbtA111yjPn36aMstt9Qf//hHmZmWLVumk08+WevXr9d7772nL3/5y4lLf2wKVq9erdtvvz3xu5eU2bZxzTXX5HrbePHFF1UqlfTGG29os80205QpU7RgwYKWQ+ArmT59us466yytW7eu5f0DAAAgP6x5j1RIpVLJO3rdwqx0pildunRp0BE/586dq0svvVSzZ8/ORZ6iymPdW28bAAAAQHcws0Z3L1Wax+G7GdiYUVezkLc8RUHdAQAAgDT2lNaoM8OoewQAACAASURBVHtKzUx5qHOzvOUpCuoOAACAomJPKQAAAAAgl2hKAQAAAADB0JRmoPVIqqHlLU9RUHcAAAAgjXNKa9RVl4QBAAAAgKLhnNLAzCx0hIS85SkK6g4AAACk0ZQCAAAAAIKhKQUAAAAABENTmoGRI0eGjpCQtzxFQd0BAACANJrSDMyaNSt0hIS85SkK6g4AAACk0ZRmYNSoUaEjJOQtT1FQdwAAACCNpjQDs2fPDh0hIW95ioK6AwAAAGk0pUgxM5100kktj9etW6d+/fpt9DmRw4cPV/P1Z4866ii99tprXZpTkp5//nl97nOf01577aW9995bTU1NkqRTTjlFu+yyi+rr61VfX6/58+d3+WsDAAAA6LzeoQMgf97//vfriSee0Jo1a7Tlllvq9ttv18CBAzu1zltuuaWL0iV99atf1YQJE3T44Ydr1apV2myzDd+zXHLJJfrSl77ULa8LAAAAoGuwpzQD7h46QkIteT7/+c9rzpw5kqRrr71WJ5xwQsu8t956S6eeeqoOOOAADR48WDfffLMkac2aNRozZowGDRqk448/XmvWrGl5Tl1dnV5++WVJ0tFHH62hQ4dqn3320bRp01qW6du3ryZMmKD9999fw4YN0/Lly6tmXLBggdatW6fDDz+85flbbbVVjVXIXt62AwAAACAPaEozUN545UEtecaMGaPrrrtOa9eu1eOPP65PfOITLfN+9KMf6bOf/aweeeQR3X333Tr33HP11ltv6YorrtBWW22lxx9/XBMmTFBjY2PFdV911VVqbGxUQ0ODLr/8cq1cuVJS1OwOGzZMjz32mA455BBNnz5dkjRz5kydf/75qfU8+eST2nrrrfXFL35RgwcP1rnnnqv169e3zJ8wYYIGDRqkb33rW3r77bc3qkbdIW/bAQAAAJAHNKUZOP3000NHSKglz6BBg9TU1KRrr71WRx11VGLebbfdpsmTJ6u+vl7Dhw/X2rVr9fzzz+vee+/ViSee2PL8QYMGVVz35Zdf3rI39IUXXtBTTz0lSdp8881bzlsdOnRoy/mho0eP1oUXXphaz7p163Tffffp0ksv1SOPPKJnnnlGV199tSTpJz/5iRYuXKhHHnlEr7zyii6++OKaatOd8rYdAAAAAHlAU4o2jR49Wt/5zncSh+5K0WGoN9xwg+bPn6/58+fr+eef11577SUpGiSpmrlz5+qOO+7Qgw8+qMcee0yDBw/W2rVrJUl9+vRpeX6vXr20bt26quvaaaedNHjwYH3sYx9T7969dfTRR2vevHmSpP79+8vMtMUWW+hf//Vf9fDDD3eoBgAAAAC6F00p2nTqqafq/PPP13777ZeYfsQRR+gXv/hFyzmSjz76qCTpkEMO0e9//3tJ0hNPPKHHH388tc7XX39d22yzjbbaaistXLhQDz30UIfzHXDAAXr11Ve1YsUKSdJdd92lvffeW5K0bNkySVEDfdNNN2nfffft8OsAAAAA6D40pRmYOXNm6AgJtebZaaeddNZZZ6Wmn3feeXr33Xc1aNAg7bvvvjrvvPMkSWeccYZWrVqlQYMG6ac//akOPPDA1HOPPPJIrVu3ToMGDdJ5552nYcOG1ZS30jmlvXr10qWXXqrDDjtM++23n9xdY8eOlSR95Stf0X777af99ttPL7/8sn7wgx/U9J67U962AwAAACAPLA8jgpZKJW++nmVe1Y2fU3V+0+QRbc5bunSpBgwY0NWROixveYqCugMAAKCozKzR3UuV5rGnNAOdvcZnV8tbnqKg7gAAAEAaTSkAAAAAIBiaUgAAAABAMDSlGWgefCcv8panKKg7AAAAkMZARzXqzEBHAAAAAFBkDHQU2NChQ0NHSMhbnqKg7gAAAEAaTWkG5s2bFzpCQt7yFAV1BwAAANJoSgEAAAAAwdCUZqB///6hIyTkLU9RUHcAAAAgjaY0A0uXLg0dISFveYqCugMAAABpNKUZmDRpUugICXnLUxTUHQAAAEjjkjA16swlYcxMeahzs7zlKQrqDgAAgKLikjAAAAAAgFyiKQUAAAAABENTmoG8HZqctzxFQd0BAACANJpSAAAAAEAwNKUZKJUqns8bTN7yFAV1BwAAANJoSgEAAAAAwdCUAgAAAACCoSnNwMSJE0NHSMhbnqKg7gAAAECauXvoDCqVSp73kUnrxs+pOr9p8oiMkgAAAABAz2Jmje5ecZAV9pRmYMCAAaEjJOQtT1FQdwAAACCNpjQDy5YtCx0hIW95ioK6AwAAAGk0pQAAAACAYGhKMzBkyJDQERLylqcoqDsAAACQ1m5TamZXmdlLZvZE2bRJZrbEzObHt6PK5n3PzJ42s3+a2RHdFbwnaWxsDB0hIW95ioK6AwAAAGm17Cm9WtKRFab/zN3r49stkmRme0saI2mf+Dm/MrNeXRW2pxo3blzoCAl5y1MU1B0AAABIa7cpdfd7Jb1S4/q+IOk6d3/b3Z+V9LSkAzuRb5Mwffr00BES8panKKg7AAAAkNaZc0rPNLPH48N7t4mnDZT0Qtkyi+NpAAAAAACkdLQpvULSrpLqJS2TdFk83Sos65VWYGbjzKzBzBpWrFjRwRgAAAAAgJ6sQ02puy939/Xu/p6k6dpwiO5iSR8pW3QnSUvbWMc0dy+5e6lfv34didFjLFmyJHSEhLzlKQrqDgAAAKR1qCk1s/5lD4+R1Dwy70xJY8xsCzPbRdJukh7uXMSeL2+jruYtT1FQdwAAACCtd3sLmNm1koZL2t7MFkuaKGm4mdUrOjS3SdLpkuTu/zCzP0laIGmdpG+4+/ruid5zjB49Wu4Vj2IOIm95ioK6AwAAAGntNqXufkKFyVdWWf5Hkn7UmVAAAAAAgGLozOi7AAAAAAB0Ck1pBqZOnRo6QkLe8hQFdQcAAADSLA/nuJVKJW9oaAgdo6q68XOqzm+aPCKjJAAAAADQs5hZo7uXKs1jT2kGzCpdvjWcvOUpCuoOAAAApNGUAgAAAACCoSkFAAAAAARDU5qBkSNHho6QkLc8RUHdAQAAgDSa0gzMmjUrdISEvOUpCuoOAAAApNGUZmDUqFGhIyTkLU9RUHcAAAAgjaY0A7Nnzw4dISFveYqCugMAAABpNKUAAAAAgGBoSgEAAAAAwdCUZsDdQ0dIyFueoqDuAAAAQBpNaQamTZsWOkJC3vIUBXUHAAAA0iwPe29KpZI3NDSEjlFV3fg5Vec3TR7R5jwzy9VesrzlKQrqDgAAgKIys0Z3L1Wax55SAAAAAEAwNKUAAAAAgGBoSjMwc+bM0BES8panKKg7AAAAkEZTmoGhQ4eGjpCQtzxFQd0BAACANJrSDAwcODB0hIS85SkK6g4AAACk0ZQCAAAAAIKhKQUAAAAABENTmoGxY8eGjpCQtzxFQd0BAACANHP30BlUKpW8oaEhdIyq6sbPqTq/afKIjJIAAAAAQM9iZo3uXqo0jz2lGcjbqKt5y1MU1B0AAABIoynNwLx580JHSMhbnqKg7gAAAEAaTSkAAAAAIBia0gz0798/dISEvOUpCuoOAAAApNGUZmDp0qWhIyTkLU9RUHcAAAAgjaY0A5MmTQodISFveYqCugMAAABpXBKmRp25JIyZKQ91bpa3PEVB3QEAAFBUXBIGAAAAAJBLNKUAAAAAgGBoSjOQt0OT85anKKg7AAAAkEZTCgAAAAAIhqY0A6VSxfN5g8lbnqKg7gAAAEAaTSkAAAAAIBiaUgAAAABAMDSlGZg4cWLoCAl5y1MU1B0AAABIM3cPnUGlUsnzPjJp3fg5Vec3TR6RURIAAAAA6FnMrNHdKw6ywp7SDAwYMCB0hIS85SkK6g4AAACk0ZRmYNmyZaEjJOQtT1FQdwAAACCNphQAAAAAEAxNaQaGDBkSOkJC3vIUBXUHAAAA0mhKM9DY2Bg6QkLe8hQFdQcAAADSaEozMG7cuNAREvKWpyioOwAAAJDGJWFq1JlLwpiZ8lDnZnnLUxTUHQAAAEXFJWEAAAAAALlEUwoAAAAACIamNANLliwJHSEhb3mKgroDAAAAaTSlGcjbqKt5y1MU1B0AAABIa7cpNbOrzOwlM3uibNolZrbQzB43sxvNbOt4ep2ZrTGz+fHt190ZvqcYPXp06AgJectTFNQdAAAASKtlT+nVko5sNe12Sfu6+yBJT0r6Xtm8Re5eH9++1jUxAQAAAACbonabUne/V9Irrabd5u7r4ocPSdqpG7IBAAAAADZxXXFO6amS/lL2eBcze9TM7jGzT3fB+nu8qVOnho6QkLc8RUHdAQAAgDRz9/YXMquTNNvd9201fYKkkqQvurub2RaS+rr7SjMbKukmSfu4+xsV1jlO0jhJ2nnnnYc+99xznX0v3apu/Jyq85smj8goCQAAAAD0LGbW6O6lSvM6vKfUzE6WNFLSVzzubN39bXdfGd9vlLRI0u6Vnu/u09y95O6lfv36dTRGj2BmoSMk5C1PUVB3AAAAIK1DTamZHSnpu5JGu/vqsun9zKxXfP9jknaT9ExXBAUAAAAAbHp6t7eAmV0rabik7c1ssaSJikbb3ULS7fHen4fikXYPkXShma2TtF7S19z9lYorBgAAAAAUXrtNqbufUGHylW0se4OkGzobalMzcuTI0BES8panKKg7AAAAkNYVo++iHbNmzQodISFveYqCugMAAABpNKUZGDVqVOgICXnLUxTUHQAAAEijKc3A7NmzQ0dIyFueoqDuAAAAQBpNKQAAAAAgGJpSAAAAAEAwNKUZcPfQERLylqcoqDsAAACQRlOagWnTpoWOkJC3PEVB3QEAAIA0y8Pem1Kp5A0NDaFjVFU3fk7V+U2TR7Q5z8xytZcsb3mKgroDAACgqMys0d1LleaxpxQAAAAAEAxNKQAAAAAgGJrSDMycOTN0hIS85SkK6g4AAACk0ZRmYOjQoaEjJOQtT1FQdwAAACCNpjQDAwcODB0hIW95ioK6AwAAAGk0pQAAAACAYGhKAQAAAADB0JRmYOzYsaEjJOQtT1FQdwAAACDN3D10BpVKJW9oaAgdo6q68XOqzm+aPCKjJAAAAADQs5hZo7uXKs1jT2kG8jbqat7yFAV1BwAAANJoSjMwb9680BES8panKKg7AAAAkEZTCgAAAAAIhqY0A/379w8dISFveYqCugMAAABpNKUZWLp0aegICXnLUxTUHQAAAEijKc3ApEmTQkdIyFueoqDuAAAAQBqXhKlRZy4JY2bKQ52b5S1PUVB3AAAAFBWXhAEAAAAA5BJNKQAAAAAgGJrSDOTt0OS85SkK6g4AAACk0ZQCAAAAAIKhKc1AqVTxfN5g8panKKg7AAAAkEZTCgAAAAAIhqYUAAAAABAMTWkGJk6cGDpCQt7yFAV1BwAAANLM3UNnUKlU8ryPTFo3fk7V+U2TR2SUBAAAAAB6FjNrdPeKg6ywpzQDAwYMCB0hIW95ioK6AwAAAGk0pRlYtmxZ6AgJectTFNQdAAAASKMpBQAAAAAEQ1OagSFDhoSOkJC3PEVB3QEAAIA0mtIMNDY2ho6QkLc8RUHdAQAAgDSa0gyMGzcudISEvOUpCuoOAAAApHFJmBp15pIwZqY81LlZ3vIUBXUHAABAUXFJGAAAAABALtGUAgAAAACCoSnNwJIlS0JHSMhbnqKg7gAAAEAaTWkG8jbqat7yFAV1BwAAANJoSjMwevTo0BES8panKKg7AAAAkEZTCgAAAAAIhqYUAAAAABAMTWkGpk6dGjpCQt7yFAV1BwAAANLM3UNnUKlU8oaGhtAxqqobP6fq/KbJIzJKAgAAAAA9i5k1unup0jz2lGbAzEJHSMhbnqKg7gAAAEAaTSkAAAAAIJh2m1Izu8rMXjKzJ8qmbWtmt5vZU/HPbcrmfc/Mnjazf5rZEd0VHAAAAADQ89Wyp/RqSUe2mjZe0p3uvpukO+PHMrO9JY2RtE/8nF+ZWa8uS9tDjRw5MnSEhLzlKQrqDgAAAKS125S6+72SXmk1+QuSZsT3Z0g6umz6de7+trs/K+lpSQd2UdYea9asWaEjJOQtT1FQdwAAACCto+eU7ujuyyQp/rlDPH2gpBfKllscTyu0UaNGhY6QkLc8RUHdAQAAgLSuHuio0vCiFa85Y2bjzKzBzBpWrFjRxTHyZfbs2aEjJOQtT1FQdwAAACCto03pcjPrL0nxz5fi6YslfaRsuZ0kLa20Anef5u4ldy/169evgzEAAAAAAD1ZR5vSmZJOju+fLOnmsuljzGwLM9tF0m6SHu5cRAAAAADApqp3ewuY2bWShkva3swWS5ooabKkP5nZaZKel3ScJLn7P8zsT5IWSFon6Rvuvr6bsvcY7hWPYA4mb3mKgroDAAAAabWMvnuCu/d39z7uvpO7X+nuK939MHffLf75StnyP3L3Xd19D3f/S/fG7xmmTZsWOkJC3vIUBXUHAAAA0iwPe29KpZI3NDSEjlFV3fg5Vec3TR7R5jwzy9VesrzlKQrqDgAAgKIys0Z3L1Wa19Wj7wIAAAAAUDOaUgAAAABAMDSlGZg5c2boCAl5y1MU1B0AAABIoynNwNChQ0NHSMhbnqKg7gAAAEAaTWkGBg4cGDpCQt7yFAV1BwAAANJoSgEAAAAAwdCUAgAAAACCoSnNwNixY0NHSMhbnqKg7gAAAECauXvoDCqVSt7Q0BA6RlV14+dUnd80eURGSQAAAACgZzGzRncvVZrHntIM5G3U1bzlKQrqDgAAAKTRlGZg3rx5oSMk5C1PUVB3AAAAII2mFAAAAAAQDE1pBvr37x86QkLe8hQFdQcAAADSaEozsHTp0tAREvKWpyioOwAAAJBGU5qBSZMmhY6QkLc8RUHdAQAAgDQuCVOjzlwSxsyUhzo3y1ueoqDuAAAAKCouCQMAAAAAyCWaUgAAAABAMDSlGcjbocl5y1MU1B0AAABIoykFAAAAAARDU5qBUqni+bzB5C1PUVB3AAAAII2mFAAAAAAQDE0pAAAAACAYmtIMTJw4MXSEhLzlKQrqDgAAAKSZu4fOoFKp5HkfmbRu/Jyq85smj8goCQAAAAD0LGbW6O4VB1lhT2kGBgwYEDpCQt7yFAV1BwAAANJoSjOwbNmy0BES8panKKg7AAAAkEZTCgAAAAAIhqY0A0OGDAkdISFveYqCugMAAABpNKUZaGxsDB0hIW95ioK6AwAAAGk0pRkYN25c6AgJectTFNQdAAAASOOSMDXqzCVhzEx5qHOzvOUpCuoOAACAouKSMAAAAACAXKIpBQAAAAAEQ1OagSVLloSOkJC3PEVB3QEAAIA0mtIM5G3U1bzlKQrqDgAAAKTRlGZg9OjRoSMk5C1PUVB3AAAAIK136ACbivZG5wUAAAAApLGnFAAAAAAQDE1pBrY94szQERKmTp0aOkIhUXcAAAAgjaY0Ax+oPzJ0hIRx48aFjlBI1B0AAABIoynNwHMXjwwdIcHMQkcoJOoOAAAApNGUAgAAAACCoSkFAAAAAARDU5qBLXc9IHSEhJEj83U4cVFQdwAAACCNpjQDO3xpYugICbNmzQodoZCoOwAAAJBGU5qBl66/IHSEhFGjRoWOUEjUHQAAAEijKc3AmkWPhI6QMHv27NARCom6AwAAAGk0pQAAAACAYGhKAQAAAADB9O7oE81sD0l/LJv0MUnnS9pa0lhJK+Lp33f3WzqccBPw0e/m67BNdw8doZCoOwAAAJDW4T2l7v5Pd69393pJQyWtlnRjPPtnzfOK3pBK0pvzbw0dIWHatGmhIxQSdQcAAADSuurw3cMkLXL357pofZuUV/76y9AREk4//fTQEQqJugMAAABpXdWUjpF0bdnjM83scTO7ysy26aLXAAAAAABsYjrdlJrZ5pJGS/pzPOkKSbtKqpe0TNJlbTxvnJk1mFnDihUrKi0CAAAAANjEdcWe0s9LmufuyyXJ3Ze7+3p3f0/SdEkHVnqSu09z95K7l/r169cFMfKr37HnhY6QMHPmzNARCom6AwAAAGld0ZSeoLJDd82sf9m8YyQ90QWv0aNtvuPHQ0dIGDp0aOgIhUTdAQAAgLRONaVmtpWkwyX9T9nkn5rZ/5rZ45I+I+lbnXmNTcGSX50cOkLCwIEDQ0coJOoOAAAApHX4OqWS5O6rJW3XatpJnUoEAAAAACiMrhp9FwAAAACAjUZTmoG++x8ROkLC2LFjQ0coJOoOAAAApNGUZmC7I/89dISEadOmhY5QSNQdAAAASKMpzcCyq88KHSGBUWDDoO4AAABAGk1pBt5Zvih0hIR58+aFjlBI1B0AAABIoykFAAAAAARDU5qBXn23DR0hoX///qEjFBJ1BwAAANJoSjOw0zeuCR0hYenSpaEjFBJ1BwAAANJoSjPw2v2/Dx0hYdKkSaEjFBJ1BwAAANLM3UNnUKlU8oaGhtAxqqobP6fDz33u4pHKQ52bmVmu8hQFdQcAAEBRmVmju5cqzWNPKQAAAAAgGJpSAAAAAEAwNKUZ+PDJU0JHSMj7odKbKuoOAAAApNGUAgAAAACCoSnNwIszzg4dIaFUqnh+MboZdQcAAADSaEoBAAAAAMHQlAIAAAAAgqEpzcCHDj4hdISEiRMnho5QSNQdAAAASDN3D51BpVLJ8z4yad34OZ16ftPkEV2UBAAAAAB6FjNrdPeKg6ywpzQDi//rq6EjJAwYMCB0hEKi7gAAAEAaTWkG1q96JXSEhGXLloWOUEjUHQAAAEijKQUAAAAABENTmoHNd9w1dISEIUOGhI5QSNQdAAAASKMpzUD/U34eOkJCY2Nj6AiFRN0BAACANJrSDKy89RehIySMGzcudIRCou4AAABAGk1pBlY99tfQERKmT58eOkIhUXcAAAAgjaYUAAAAABAMTSkAAAAAIBia0gwM/PqM0BESlixZEjpCIVF3AAAAII2mNAPvLH86dIQERoENg7oDAAAAaTSlGVhxww9DR0gYPXp06AiFRN0BAACANJpSAAAAAEAwNKUAAAAAgGBoSjOw7RFnho6QMHXq1NARCom6AwAAAGnm7qEzqFQqeUNDQ+gYVdWNn9Ot62+aPKJb1w8AAAAAoZhZo7uXKs1jT2kGnrt4ZOgICWYWOkIhUXcAAAAgjaYUAAAAABAMTSkAAAAAIBia0gxsuesBoSMkjByZr8OJi4K6AwAAAGk0pRnY4UsTQ0dImDVrVugIhUTdAQAAgDSa0gy8dP0FoSMkjBo1KnSEQqLuAAAAQFrv0AGKYM2iR9pdpr1LznTlJWNmz57dZetC7ag7AAAAkMaeUgAAAABAMDSlAAAAAIBgaEoz8NHv5uuwTXcPHaGQqDsAAACQRlOagTfn3xo6QsK0adNCRygk6g4AAACk0ZRm4JW//jJ0hITTTz89dIRCou4AAABAGk0pAAAAACAYmlIAAAAAQDA0pRnod+x5oSMkzJw5M3SEQqLuAAAAQBpNaQY23/HjoSMkDB06NHSEQqLuAAAAQBpNaQaW/Ork0BESBg4cGDpCIVF3AAAAIK13Z55sZk2S3pS0XtI6dy+Z2baS/iipTlKTpC+7+6udiwkAAAAA2BR1xZ7Sz7h7vbuX4sfjJd3p7rtJujN+DAAAAABASnccvvsFSTPi+zMkHd0Nr9Gj9N3/iNAREsaOHRs6QiFRdwAAACDN3L3jTzZ7VtKrklzSVHefZmavufvWZcu86u7bVHjuOEnjJGnnnXce+txzz3U4Rxbqxs8J+vpNk0cEfX0AAAAA6Cgzayw7ujahs3tKD3b3IZI+L+kbZnZIrU9092nuXnL3Ur9+/ToZI9+WXX1W6AgJjAIbBnUHAAAA0jrVlLr70vjnS5JulHSgpOVm1l+S4p8vdTZkT/fO8kWhIyTMmzcvdIRCou4AAABAWoebUjN7v5l9oPm+pM9JekLSTEnN10A5WdLNnQ0JAAAAANg0deaSMDtKutHMmtfzB3e/1cwekfQnMztN0vOSjut8zJ6tV99tQ0dI6N+/f+gIhUTdAQAAgLQON6Xu/oyk/StMXynpsM6E2tTs9I1ruv01ahmIqXmwpKVLl3Z3HFRA3QEAAIC07rgkDFp57f7fh46QMGnSpNARCom6AwAAAGk0pRl4/YFrQ0dIuOCCC0JHKCTqDgAAAKR15pxSZCj0dVIBAAAAoDuwpxQAAAAAEAxNaQY+fPKU0BESGhoaQkcoJOoOAAAApNGUAgAAAACCoSnNwIszzg4dIaFUKoWOUEjUHQAAAEijKQUAAAAABENTCgAAAAAIhqY0Ax86+ITQERImTpwYOkIhUXcAAAAgzdw9dAaVSiXP+8ikm8J1QpsmjwgdAQAAAEABmVmju1ccZIU9pRlY/F9fDR0hYcCAAaEjFBJ1BwAAANJoSjOwftUroSMkLFu2LHSEQqLuAAAAQBpNKQAAAAAgGJrSDGy+466hIyQMGTIkdIRCou4AAABAGk1pBvqf8vPQERIaGxtDRygk6g4AAACk0ZRmYOWtvwgdIWHcuHGhIxQSdQcAAADSaEozsOqxv4aOkDB9+vTQEQqJugMAAABpNKUAAAAAgGBoSgEAAAAAwdCUZmDg12eEjpCwZMmS0BEKiboDAAAAaTSlGXhn+dOhIyQwCmwY1B0AAABIoynNwIobfhg6QsLo0aNDRygk6g4AAACk0ZQCAAAAAIKhKQUAAAAABENTmoFtjzgzdISEqVOnho5QSNQdAAAASKMpzcAH6o8MHSFh3LhxoSMUEnUHAAAA0mhKM/DcxSNDR0gws9ARCom6AwAAAGk0pQAAAACAYGhKAQAAAADB0JRmYMtdDwgdIWHkyHwdTlwU1B0AAABIoynNwA5fmhg6QsKsWbNCRygk6g4AAACk0ZRm4KXrLwgdIWHUqFGhIxQSdQcAAMD/b+/+Y62u6ziOv16B9AvLNCIGClbOfm1iEqvRWr+5ZtNateFK6ZewhQy2tqS2Bqx/bOuHrbTtViYu0pnmek4H1wAADQhJREFUYuQg1481XVOQKCVigVFeQSididVi2Ls/zlc6384VkHvO5/P5nu/zsbF7zuf+OC8+b+69e/P+nO9BL5rSBP61Z0vuCDUbN27MHaGV2HcAAACgF00pAAAAACCbybkDIJ05q34y7u2n7b36opRxAAAAAIBJaQqzryrr2GZpedoiInJHAAAAAIpDU5rAoe2bckeoKS1PW4yOjuaOAAAAABSHpjSBxzZ/M3eEmtLytMXSpUtzRwAAAACKQ1MKAAAAAMiGCx3hqPEuftSNCyEBAAAA6DcmpQlM++AXckeoKS1PW2zYsCF3BAAAAKA4NKUJTJn+qtwRakrL0xYXXHBB7ggAAABAcWhKE3j4usW5I9SUlqctZs6cmTsCAAAAUByaUgAAAABANjSlAAAAAIBsaEoTmHrewtwRakrL0xZXXHFF7ggAAABAcWhKEzhjZHnuCDWl5WmL0dHR3BEAAACA4tCUJrD/hhW5I9SUlqctuPouAAAA0IumNIHDB/bkjlBTWp622LZtW+4IAAAAQHFoSgEAAAAA2Zx0U2r7TNu/sL3T9g7bK6r1NbYftr29+vPe/sVtpklTT88doaa0PG0xY8aM3BEAAACA4kyewOcekfSZiNhm+1RJ99m+s3rf1yLiyxOPNxxmLbsxd4Sa0vK0xb59+3JHAAAAAIpz0pPSiNgfEduq24ck7ZQ0s1/Bhsnjd63PHaGmtDxtsWbNmtwRAAAAgOL05TmltudIOl/SPdXSlbZ/Z/t62y/px2M02d/vvil3hJrS8rTF2rVrc0cAAAAAijOR47uSJNtTJd0maWVEPGH7W5K+KCmqt1+R9IlxPm+JpCWSdNZZZ000BobAnFU/Oeb79159UaIkAAAAAFKZ0KTU9inqNKTrI+JHkhQRByLiqYj4j6RvS5o/3udGxGhEzIuIedOmTZtIDAAAAABAQ03k6ruW9F1JOyPiq13r3ZcY/YCkB04+3nB4+eJrckeoKS1PW2zdujV3BAAAAKA4Ezm+u0DSZZLut729Wvu8pEttz1Xn+O5eSUsnlBAAAAAAMLROuimNiLskeZx33XHycYbTI+tWavZVG3PHOKq0PCeq6c85nTdvniIidwwAAACgKH25+i4AAAAAACeDphQAAAAAkA1NaQIvXnBp7gg1peVpi9WrV+eOAAAAABSHpjSB097ykdwRakrL0xZr1qzJHQEAAAAozkSuvosTNHbt5Zq17MbcMY4qLc8wOdbFmMauvVxHDj2aMA0AAABQPialCTz15GO5I9SUlqct2HcAAACgF00pAAAAACAbju8mMGX6K3NHqDnZPE1/ndDcSvt3AAAAAJSASWkCMz729dwRakrL0xbsOwAAANCLSWkCj276hs4YWZ47xlG58hxv0lpChkFOex/d9A2JaTIAAABQw6Q0gSd/uzl3hJrS8rQF+w4AAAD0YlIKPAslTHsBAACAYcKkFAAAAACQDU1pAjM/vS53hJrS8rQF+w4AAAD0oilN4PCB3bkj1JSWpy3YdwAAAKAXTWkCf73ti7kj1JSWpy3YdwAAAKAXFzoCEsr5kjQAAABAiZiUAgAAAACyoSlN4PSFV+aOUFNanrZg3wEAAIBeNKUJnDp3JHeEmtLytAX7DgAAAPSiKU3gz196X+4INaXlaQv2HQAAAOjFhY4wNI53EaFUX2OQj8+FkAAAADBsmJQCAAAAALJhUprA81/5xtwRakrL0xYl7PuJTIKZxgIAACAlJqUJvOxDq3NHqCktT1uw7wAAAEAvmtIEDt66NneEmtLytAX7DgAAAPTi+G4C/9qzJXeEmkHlyX2RoNKV9u/gmXCxJQAAAKTEpBQAAAAAkA2TUgAoDNNqAADQJkxKE5h91cbcEWpKy9MW7DsAAADQi6Y0gUPbN+WOUFNanrZg3wEAAIBeHN9N4LHN39Spc0dyxziqtDxt0ZZ9H/TR04leUIujrwAAAGVhUgoAAAAAyIZJKdAgvOzOxJ3IHjJNBQAASIdJaQLTPviF3BFqSsvTFuw7AAAA0IumNIEp01+VO0JNaXnagn0HAAAAenF8N4GHr1tc1MuBlJanLYZl3wd9hJgjygAAAO3CpBQAAAAAkA2TUgBJMQkFAABANyalCUw9b2HuCDWl5WkL9h0AAADoxaQ0gTNGlueOUFNanrZg39vjeNPgib7kzKC/PgAAQEpMShPYf8OK3BFqSsvTFuw7AAAA0IumNIHDB/bkjlBTWp62YN8BAACAXhzfBYCW6cfFpob9CHLp+QAAGCZMShOYNPX03BFqSsvTFuw7AAAA0ItJaQKzlt2YO0JNaXnagn1vjtwXKhoGbfg7DhrTWgBAWzApTeDxu9bnjlBTWp62YN8BAACAXjSlCfz97ptyR6gpLU9bsO8AAABAL47vAsCzxNHUwe8BR1ePjz0CAAwLJqUAAAAAgGyYlCbw8sXX5I5QU1qetmDfkQqT3ME7kT0+3qSyDdPmEjIMUgkvrwQAw2Bgk1LbI7Z32d5te9WgHgcAAAAA0FwDmZTaniTpWknvljQmaYvtDRHx+0E8XukeWbdSs6/amDvGUaXlaQv2HeifiU7gUkyTS59YN2GK2YSMpcv9ElfUqAxtqNOw/x37cUKnZIOalM6XtDsiHoyIw5JulnTJgB4LAAAAANBQg2pKZ0p6qOv+WLUGAAAAAMBRjoj+f1H7w5IWRsSnqvuXSZofEcu7PmaJpCXV3XMl7erDQ79U0t/68HWQDzUcDtSx+ahh81HD5qOGzUcNhwN17I/ZETFtvHcM6uq7Y5LO7Lo/S9K+7g+IiFFJo/18UNtbI2JeP78m0qKGw4E6Nh81bD5q2HzUsPmo4XCgjoM3qOO7WySdY/ts21MkLZK0YUCPBQAAAABoqIFMSiPiiO0rJW2WNEnS9RGxYxCPBQAAAABorkEd31VE3CHpjkF9/WfQ1+PAyIIaDgfq2HzUsPmoYfNRw+ajhsOBOg7YQC50BAAAAADAiRjUc0oBAAAAADiuoWlKbY/Y3mV7t+1VufNgfLavt33Q9gNda6fbvtP2H6u3L+l63+eqmu6yvTBPanSzfabtX9jeaXuH7RXVOnVsCNvPs32v7d9WNVxbrVPDhrE9yfZvbG+s7lPDhrG91/b9trfb3lqtUccGsX2a7Vtt/6H63fhmatgcts+tvv+e/vOE7ZXUMK2haEptT5J0raQLJb1W0qW2X5s3FZ7BDZJG/m9tlaSfRcQ5kn5W3VdVw0WSXld9znVVrZHXEUmfiYjXSHqTpGVVrahjc/xb0jsi4jxJcyWN2H6TqGETrZC0s+s+NWymt0fE3K6XnKCOzfJ1SZsi4tWSzlPne5IaNkRE7Kq+/+ZKukDSPyXdLmqY1FA0pZLmS9odEQ9GxGFJN0u6JHMmjCMifiXpsf9bvkTSuur2Oknv71q/OSL+HRF/krRbnVojo4jYHxHbqtuH1PnlO1PUsTGi48nq7inVnxA1bBTbsyRdJOk7XcvUcDhQx4aw/SJJb5X0XUmKiMMR8bioYVO9U9KeiPizqGFSw9KUzpT0UNf9sWoNzTA9IvZLnYZH0suqdepaONtzJJ0v6R5Rx0apjn1ul3RQ0p0RQQ2b5xpJn5X0n641atg8Iemntu+zvaRao47N8QpJf5X0veoo/Xdsv1DUsKkWSbqpuk0NExqWptTjrHFZ4eajrgWzPVXSbZJWRsQTx/rQcdaoY2YR8VR1VGmWpPm2X3+MD6eGhbH9PkkHI+K+E/2UcdaoYRkWRMQb1HkK0jLbbz3Gx1LH8kyW9AZJ34qI8yX9Q9Uxz2dADQtle4qkiyX98HgfOs4aNZygYWlKxySd2XV/lqR9mbLg2Ttge4YkVW8PVuvUtVC2T1GnIV0fET+qlqljA1XHzH6pzvNiqGFzLJB0se296jxl5R22vy9q2DgRsa96e1Cd57HNF3VskjFJY9VpE0m6VZ0mlRo2z4WStkXEgeo+NUxoWJrSLZLOsX129b8ciyRtyJwJJ26DpMXV7cWSfty1vsj2c22fLekcSfdmyIcutq3Oc2d2RsRXu95FHRvC9jTbp1W3ny/pXZL+IGrYGBHxuYiYFRFz1Pmd9/OI+KioYaPYfqHtU5++Lek9kh4QdWyMiHhE0kO2z62W3inp96KGTXSp/nd0V6KGSU3OHaAfIuKI7SslbZY0SdL1EbEjcyyMw/ZNkt4m6aW2xyStlnS1pFtsf1LSXyR9WJIiYoftW9T54X5E0rKIeCpLcHRbIOkySfdXz0mUpM+LOjbJDEnrqqsFPkfSLRGx0favRQ2bju/DZpku6fbO//VpsqQfRMQm21tEHZtkuaT11WDkQUkfV/WzlRo2g+0XSHq3pKVdy/w8TcgRHIEGAAAAAOQxLMd3AQAAAAANRFMKAAAAAMiGphQAAAAAkA1NKQAAAAAgG5pSAAAAAEA2NKUAAAAAgGxoSgEAAAAA2dCUAgAAAACy+S860d8VuUH5rwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 1152x576 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"items_per_user=df.groupby(['user']).count()['rating']\n",
"\n",
"plt.figure(figsize=(16,8))\n",
"plt.hist(items_per_user, bins=100)\n",
"\n",
"# Let's add median\n",
"t=items_per_user.median()\n",
"plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
"plt.text(t*1.1, plt.ylim()[1]*0.9, 'Median: {:.0f}'.format(t))\n",
"\n",
"# Let's add also some percentiles\n",
"t=items_per_user.quantile(0.25)\n",
"plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
"plt.text(t*1.1, plt.ylim()[1]*0.95, '25% quantile: {:.0f}'.format(t))\n",
"\n",
"t=items_per_user.quantile(0.75)\n",
"plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
"plt.text(t*1.05, plt.ylim()[1]*0.95, '75% quantile: {:.0f}'.format(t))\n",
"\n",
"plt.title('Number of ratings per user', fontsize=30)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1152x576 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"items_per_user=df.groupby(['item']).count()['rating']\n",
"\n",
"plt.figure(figsize=(16,8))\n",
"plt.hist(items_per_user, bins=100)\n",
"\n",
"# Let's add median\n",
"t=items_per_user.median()\n",
"plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
"plt.text(t*1.1, plt.ylim()[1]*0.9, 'Median: {:.0f}'.format(t))\n",
"\n",
"# Let's add also some percentiles\n",
"t=items_per_user.quantile(0.25)\n",
"plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
"plt.text(t*1.1, plt.ylim()[1]*0.95, '25% quantile: {:.0f}'.format(t))\n",
"\n",
"t=items_per_user.quantile(0.75)\n",
"plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n",
"plt.text(t*1.05, plt.ylim()[1]*0.95, '75% quantile: {:.0f}'.format(t))\n",
"\n",
"plt.title('Number of ratings per item', fontsize=30)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"rating\n",
"1 0.06110\n",
"2 0.11370\n",
"3 0.27145\n",
"4 0.34174\n",
"5 0.21201\n",
"Name: user, dtype: float64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby(['rating']).count()['user']/len(df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Item attributes"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"genres = pd.read_csv('./Datasets/ml-100k/u.genre', sep='|', header=None,\n",
" encoding='latin-1')\n",
"genres=dict(zip(genres[1], genres[0]))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{0: 'unknown',\n",
" 1: 'Action',\n",
" 2: 'Adventure',\n",
" 3: 'Animation',\n",
" 4: \"Children's\",\n",
" 5: 'Comedy',\n",
" 6: 'Crime',\n",
" 7: 'Documentary',\n",
" 8: 'Drama',\n",
" 9: 'Fantasy',\n",
" 10: 'Film-Noir',\n",
" 11: 'Horror',\n",
" 12: 'Musical',\n",
" 13: 'Mystery',\n",
" 14: 'Romance',\n",
" 15: 'Sci-Fi',\n",
" 16: 'Thriller',\n",
" 17: 'War',\n",
" 18: 'Western'}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"genres"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"movies = pd.read_csv('./Datasets/ml-100k/u.item', sep='|', encoding='latin-1', header=None)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>14</th>\n",
" <th>15</th>\n",
" <th>16</th>\n",
" <th>17</th>\n",
" <th>18</th>\n",
" <th>19</th>\n",
" <th>20</th>\n",
" <th>21</th>\n",
" <th>22</th>\n",
" <th>23</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Toy Story (1995)</td>\n",
" <td>01-Jan-1995</td>\n",
" <td>NaN</td>\n",
" <td>http://us.imdb.com/M/title-exact?Toy%20Story%2...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>GoldenEye (1995)</td>\n",
" <td>01-Jan-1995</td>\n",
" <td>NaN</td>\n",
" <td>http://us.imdb.com/M/title-exact?GoldenEye%20(...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Four Rooms (1995)</td>\n",
" <td>01-Jan-1995</td>\n",
" <td>NaN</td>\n",
" <td>http://us.imdb.com/M/title-exact?Four%20Rooms%...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 24 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 \\\n",
"0 1 Toy Story (1995) 01-Jan-1995 NaN \n",
"1 2 GoldenEye (1995) 01-Jan-1995 NaN \n",
"2 3 Four Rooms (1995) 01-Jan-1995 NaN \n",
"\n",
" 4 5 6 7 8 9 ... \\\n",
"0 http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 0 1 1 ... \n",
"1 http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 1 0 0 ... \n",
"2 http://us.imdb.com/M/title-exact?Four%20Rooms%... 0 0 0 0 0 ... \n",
"\n",
" 14 15 16 17 18 19 20 21 22 23 \n",
"0 0 0 0 0 0 0 0 0 0 0 \n",
"1 0 0 0 0 0 0 0 1 0 0 \n",
"2 0 0 0 0 0 0 0 1 0 0 \n",
"\n",
"[3 rows x 24 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movies[:3]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"for i in range(19):\n",
" movies[i+5]=movies[i+5].apply(lambda x: genres[i] if x==1 else '')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"movies['genre']=movies.iloc[:, 5:].apply(lambda x: ', '.join(x[x!='']), axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"movies=movies[[0,1,'genre']]\n",
"movies.columns=['id', 'title', 'genres']"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>title</th>\n",
" <th>genres</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Toy Story (1995)</td>\n",
" <td>Animation, Children's, Comedy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>GoldenEye (1995)</td>\n",
" <td>Action, Adventure, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Four Rooms (1995)</td>\n",
" <td>Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Get Shorty (1995)</td>\n",
" <td>Action, Comedy, Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Copycat (1995)</td>\n",
" <td>Crime, Drama, Thriller</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id title genres\n",
"0 1 Toy Story (1995) Animation, Children's, Comedy\n",
"1 2 GoldenEye (1995) Action, Adventure, Thriller\n",
"2 3 Four Rooms (1995) Thriller\n",
"3 4 Get Shorty (1995) Action, Comedy, Drama\n",
"4 5 Copycat (1995) Crime, Drama, Thriller"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movies.to_csv('./Datasets/ml-100k/movies.csv', index=False)\n",
"movies[:5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Toy example"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"if not os.path.exists('./Datasets/toy-example/'):\n",
" os.mkdir('./Datasets/toy-example/')"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"toy_train=pd.DataFrame([[0,0,3,0], [0,10,4,0], [0,40,5,0], [0,70,4,0],\n",
" [10,10,1,0], [10,20,2,0], [10,30,3,0],\n",
" [20,30,5,0], [20,50,3,0], [20,60,4,0]])\n",
"toy_test=pd.DataFrame([[0,60,3,0],\n",
" [10,40,5,0],\n",
" [20,0,5,0], [20,20,4,0], [20,70,2,0]])\n",
"\n",
"toy_train.to_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, index=False)\n",
"toy_test.to_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}