diff --git a/Class_1_Recommender_systems_introduction.pdf b/Class_1_Recommender_systems_introduction.pdf deleted file mode 100644 index 13c6c28..0000000 Binary files a/Class_1_Recommender_systems_introduction.pdf and /dev/null differ diff --git a/Class_4_Content_based_recommenders.one b/Class_4_Content_based_recommenders.one deleted file mode 100644 index 320cea0..0000000 Binary files a/Class_4_Content_based_recommenders.one and /dev/null differ diff --git a/Class_4_Content_based_recommenders.pdf b/Class_4_Content_based_recommenders.pdf deleted file mode 100644 index b63a7b2..0000000 Binary files a/Class_4_Content_based_recommenders.pdf and /dev/null differ diff --git a/Class_4_Recommender_systems_testing_and_evaluation.pdf b/Class_4_Recommender_systems_testing_and_evaluation.pdf deleted file mode 100644 index 3e29080..0000000 Binary files a/Class_4_Recommender_systems_testing_and_evaluation.pdf and /dev/null differ diff --git a/Class_5_Amazon_recommender.one b/Class_5_Amazon_recommender.one deleted file mode 100644 index 7c74476..0000000 Binary files a/Class_5_Amazon_recommender.one and /dev/null differ diff --git a/Class_5_Amazon_recommender.pdf b/Class_5_Amazon_recommender.pdf deleted file mode 100644 index e1883ad..0000000 Binary files a/Class_5_Amazon_recommender.pdf and /dev/null differ diff --git a/class_2_numpy_pandas.ipynb b/class_2_numpy_pandas.ipynb deleted file mode 100644 index 550b907..0000000 --- a/class_2_numpy_pandas.ipynb +++ /dev/null @@ -1,4669 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "spread-happiness", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "from IPython.display import Markdown, display, HTML\n", - "\n", - "# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n", - "import os\n", - "os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'" - ] - }, - { - "cell_type": "markdown", - "id": "approximate-classic", - "metadata": {}, - "source": [ - "# Numpy\n", - "\n", - "For a detailed reference check out: https://numpy.org/doc/stable/reference/arrays.indexing.html." - ] - }, - { - "cell_type": "markdown", - "id": "renewable-chase", - "metadata": {}, - "source": [ - "## Creating numpy arrays" - ] - }, - { - "cell_type": "markdown", - "id": "afraid-consortium", - "metadata": {}, - "source": [ - "### Directly" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "textile-mainland", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[1. 2. 3.]\n", - " [4. 5. 6.]\n", - " [7. 8. 9.]]\n" - ] - } - ], - "source": [ - "a = np.array(\n", - " [[1.0, 2.0, 3.0], \n", - " [4.0, 5.0, 6.0], \n", - " [7.0, 8.0, 9.0]]\n", - ")\n", - "\n", - "print(a)" - ] - }, - { - "cell_type": "markdown", - "id": "floating-junior", - "metadata": {}, - "source": [ - "### From a list" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "reasonable-mistress", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]\n", - "\n", - "[[1. 2. 3.]\n", - " [4. 5. 6.]\n", - " [7. 8. 9.]]\n" - ] - } - ], - "source": [ - "a = [[1.0, 2.0, 3.0], \n", - " [4.0, 5.0, 6.0], \n", - " [7.0, 8.0, 9.0]]\n", - "\n", - "print(a)\n", - "print()\n", - "\n", - "a = np.array(a)\n", - "\n", - "print(a)" - ] - }, - { - "cell_type": "markdown", - "id": "incorrect-practitioner", - "metadata": {}, - "source": [ - "### From a list comprehension" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "straight-cooling", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]\n", - "\n", - "[ 0 1 4 9 16 25 36 49 64 81]\n" - ] - } - ], - "source": [ - "a = [i**2 for i in range(10)]\n", - "\n", - "print(a)\n", - "print()\n", - "print(np.array(a))" - ] - }, - { - "cell_type": "markdown", - "id": "suffering-myanmar", - "metadata": {}, - "source": [ - "### Ready-made functions in numpy" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "expensive-bowling", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "All zeros\n", - "[[0. 0. 0. 0.]\n", - " [0. 0. 0. 0.]\n", - " [0. 0. 0. 0.]]\n", - "\n", - "All chosen value (variant 1)\n", - "[[7. 7. 7. 7.]\n", - " [7. 7. 7. 7.]\n", - " [7. 7. 7. 7.]]\n", - "\n", - "All chosen value (variant 2)\n", - "[[7. 7. 7. 7.]\n", - " [7. 7. 7. 7.]\n", - " [7. 7. 7. 7.]]\n", - "\n", - "Random integers\n", - "[[7 5]\n", - " [9 8]\n", - " [6 3]]\n", - "\n", - "Random values from the normal distribution\n", - "[[ 3.88109518 -15.30896612]\n", - " [ 7.88779281 7.67458172]\n", - " [ -9.81026963 -6.02098263]]\n" - ] - } - ], - "source": [ - "# All zeros\n", - "a = np.zeros((3, 4))\n", - "print(\"All zeros\")\n", - "print(a)\n", - "print()\n", - "\n", - "# All a chosen value\n", - "a = np.full((3, 4), 7.0)\n", - "print(\"All chosen value (variant 1)\")\n", - "print(a)\n", - "print()\n", - "\n", - "# or\n", - "\n", - "a = np.zeros((3, 4))\n", - "a[:] = 7.0\n", - "print(\"All chosen value (variant 2)\")\n", - "print(a)\n", - "print()\n", - "\n", - "# Random integers\n", - "\n", - "a = np.random.randint(low=0, high=10, size=(3, 2))\n", - "print(\"Random integers\")\n", - "print(a)\n", - "print()\n", - "\n", - "# Random values from the normal distribution (Gaussian)\n", - "\n", - "print(\"Random values from the normal distribution\")\n", - "a = np.random.normal(loc=0, scale=10, size=(3, 2))\n", - "print(a)" - ] - }, - { - "cell_type": "markdown", - "id": "aggressive-titanium", - "metadata": {}, - "source": [ - "## Slicing numpy arrays" - ] - }, - { - "cell_type": "markdown", - "id": "former-richardson", - "metadata": {}, - "source": [ - "### Slicing in 1D\n", - "\n", - "To obtain only specific values from a numpy array one can use so called slicing. It has the form\n", - "\n", - "**arr[low:high:step]**\n", - "\n", - "where low is the lowest index to be retrieved, high is the lowest index not to be retrieved and step indicates that every step element will be taken." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "desirable-documentary", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Original: [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]\n", - "First 5 elements: [0, 1, 4, 9, 16]\n", - "Elements from index 3 to index 5: [9, 16, 25]\n", - "Last 3 elements (negative indexing): [49, 64, 81]\n", - "Printed in reverse order: [81, 64, 49, 36, 25, 16, 9, 4, 1, 0]\n", - "Every second element: [0, 4, 16, 36, 64]\n" - ] - } - ], - "source": [ - "a = [i**2 for i in range(10)]\n", - "\n", - "print(\"Original: \", a)\n", - "print(\"First 5 elements:\", a[:5])\n", - "print(\"Elements from index 3 to index 5:\", a[3:6])\n", - "print(\"Last 3 elements (negative indexing):\", a[-3:])\n", - "print(\"Printed in reverse order:\", a[::-1])\n", - "print(\"Every second element:\", a[::2])" - ] - }, - { - "cell_type": "markdown", - "id": "micro-explosion", - "metadata": {}, - "source": [ - "### Slicing in 2D\n", - "\n", - "In two dimensions it works similarly, just the slicing is separate for every dimension." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "disciplinary-think", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Original: \n", - "[[ 0 1 2 3 4]\n", - " [ 5 6 7 8 9]\n", - " [10 11 12 13 14]\n", - " [15 16 17 18 19]\n", - " [20 21 22 23 24]]\n", - "\n", - "First 2 elements of the first 3 row:\n", - "[[ 0 1]\n", - " [ 5 6]\n", - " [10 11]]\n", - "\n", - "Middle 3 elements from the middle 3 rows:\n", - "[[ 6 7 8]\n", - " [11 12 13]\n", - " [16 17 18]]\n", - "\n", - "Bottom-right 3 by 3 submatrix (negative indexing):\n", - "[[12 13 14]\n", - " [17 18 19]\n", - " [22 23 24]]\n", - "\n", - "Reversed columns:\n", - "[[ 4 3 2 1 0]\n", - " [ 9 8 7 6 5]\n", - " [14 13 12 11 10]\n", - " [19 18 17 16 15]\n", - " [24 23 22 21 20]]\n", - "\n" - ] - } - ], - "source": [ - "a = np.array([i for i in range(25)]).reshape(5, 5)\n", - "\n", - "print(\"Original: \")\n", - "print(a)\n", - "print()\n", - "print(\"First 2 elements of the first 3 row:\")\n", - "print(a[:3, :2])\n", - "print()\n", - "print(\"Middle 3 elements from the middle 3 rows:\")\n", - "print(a[1:4, 1:4])\n", - "print()\n", - "print(\"Bottom-right 3 by 3 submatrix (negative indexing):\")\n", - "print(a[-3:, -3:])\n", - "print()\n", - "print(\"Reversed columns:\")\n", - "print(a[:, ::-1])\n", - "print()" - ] - }, - { - "cell_type": "markdown", - "id": "removable-canyon", - "metadata": {}, - "source": [ - "### Setting numpy array field values" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "senior-serbia", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Original: \n", - "[[ 0 1 2 3 4]\n", - " [ 5 6 7 8 9]\n", - " [10 11 12 13 14]\n", - " [15 16 17 18 19]\n", - " [20 21 22 23 24]]\n", - "\n", - "Middle values changed to 5\n", - "[[ 0 1 2 3 4]\n", - " [ 5 5 5 5 9]\n", - " [10 5 5 5 14]\n", - " [15 5 5 5 19]\n", - " [20 21 22 23 24]]\n", - "\n", - "Second matrix\n", - "[[ 0 0 2]\n", - " [ 6 12 20]\n", - " [30 42 56]]\n", - "\n", - "Second matrix substituted into the middle of the first matrix\n", - "[[ 0 1 2 3 4]\n", - " [ 5 0 0 2 9]\n", - " [10 6 12 20 14]\n", - " [15 30 42 56 19]\n", - " [20 21 22 23 24]]\n" - ] - } - ], - "source": [ - "a = np.array([i for i in range(25)]).reshape(5, 5)\n", - "\n", - "print(\"Original: \")\n", - "print(a)\n", - "print()\n", - "\n", - "a[1:4, 1:4] = 5.0\n", - "\n", - "print(\"Middle values changed to 5\")\n", - "print(a)\n", - "print()\n", - "\n", - "b = np.array([i**2 - i for i in range(9)]).reshape(3, 3)\n", - "\n", - "print(\"Second matrix\")\n", - "print(b)\n", - "print()\n", - "\n", - "a[1:4, 1:4] = b\n", - "\n", - "print(\"Second matrix substituted into the middle of the first matrix\")\n", - "print(a)" - ] - }, - { - "cell_type": "markdown", - "id": "federal-wayne", - "metadata": {}, - "source": [ - "## Operations on numpy arrays\n", - "\n", - "It is important to remember that arithmetic operations on numpy arrays are always element-wise." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "southwest-biotechnology", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 0 1 4]\n", - " [ 9 16 25]\n", - " [36 49 64]]\n", - "\n", - "[[0. 1. 1.41421356]\n", - " [1.73205081 2. 2.23606798]\n", - " [2.44948974 2.64575131 2.82842712]]\n", - "\n" - ] - } - ], - "source": [ - "a = np.array([i**2 for i in range(9)]).reshape((3, 3))\n", - "print(a)\n", - "print()\n", - "\n", - "b = np.array([i**0.5 for i in range(9)]).reshape((3, 3))\n", - "print(b)\n", - "print()" - ] - }, - { - "cell_type": "markdown", - "id": "intensive-gates", - "metadata": {}, - "source": [ - "### Element-wise sum" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "behavioral-safety", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 0. 2. 5.41421356]\n", - " [10.73205081 18. 27.23606798]\n", - " [38.44948974 51.64575131 66.82842712]]\n" - ] - } - ], - "source": [ - "print(a + b)" - ] - }, - { - "cell_type": "markdown", - "id": "occupied-trial", - "metadata": {}, - "source": [ - "### Element-wise multiplication" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "charming-pleasure", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 0. 1. 5.65685425]\n", - " [ 15.58845727 32. 55.90169944]\n", - " [ 88.18163074 129.64181424 181.01933598]]\n" - ] - } - ], - "source": [ - "print(a * b)" - ] - }, - { - "cell_type": "markdown", - "id": "efficient-league", - "metadata": {}, - "source": [ - "### Matrix multiplication" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "changing-community", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 11.53000978 12.58300524 13.54977648]\n", - " [ 88.95005649 107.14378278 119.21568782]\n", - " [241.63783311 303.32808391 341.49835513]]\n", - "\n", - "[[ 0. 1. 4.]\n", - " [ 9. 16. 25.]\n", - " [36. 49. 64.]]\n" - ] - } - ], - "source": [ - "print(np.matmul(a, b))\n", - "print()\n", - "\n", - "# Multiplication by the identity matrix (to check it works as expected)\n", - "id_matrix = np.array([[1.0, 0.0, 0.0], \n", - " [0.0, 1.0, 0.0], \n", - " [0.0, 0.0, 1.0]])\n", - "\n", - "print(np.matmul(id_matrix, a))" - ] - }, - { - "cell_type": "markdown", - "id": "assisted-communications", - "metadata": {}, - "source": [ - "### Calculating the mean" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "defensive-wrong", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1 4 0 6 4]\n", - "\n", - "Mean (by sum): 3.0\n", - "Mean (by mean): 3.0\n" - ] - } - ], - "source": [ - "a = np.random.randint(low=0, high=10, size=(5))\n", - "\n", - "print(a)\n", - "print()\n", - "\n", - "print(\"Mean (by sum): \", np.sum(a) / len(a))\n", - "print(\"Mean (by mean):\", np.mean(a))" - ] - }, - { - "cell_type": "markdown", - "id": "complex-karma", - "metadata": {}, - "source": [ - "### Calculating the mean of every row" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "correct-dietary", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[4 9 5]\n", - " [8 9 1]\n", - " [5 6 4]\n", - " [3 7 8]\n", - " [2 1 5]]\n", - "\n", - "(5, 3)\n", - "\n", - "Mean: [6. 6. 5. 6. 2.66666667]\n", - "Mean in the original matrix form:\n", - "[[6. ]\n", - " [6. ]\n", - " [5. ]\n", - " [6. ]\n", - " [2.66666667]]\n" - ] - } - ], - "source": [ - "a = np.random.randint(low=0, high=10, size=(5, 3))\n", - "\n", - "print(a)\n", - "print()\n", - "print(a.shape)\n", - "print()\n", - "\n", - "print(\"Mean:\", np.sum(a, axis=1) / a.shape[1])\n", - "\n", - "print(\"Mean in the original matrix form:\")\n", - "print((np.sum(a, axis=1) / a.shape[1]).reshape(-1, 1)) # -1 calculates the right size to use all elements" - ] - }, - { - "cell_type": "markdown", - "id": "indian-orlando", - "metadata": {}, - "source": [ - "### More complex operations" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "presidential-cologne", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Vector to power 2 (element-wise)\n", - "[1. 4. 9.]\n", - "\n", - "Euler number to the power a (element-wise)\n", - "[ 2.71828183 7.3890561 20.08553692]\n", - "\n", - "An even more complex expression\n", - "[0.61971364 1.89817602 4.84758949]\n" - ] - } - ], - "source": [ - "a = [1.0, 2.0, 3.0]\n", - "\n", - "print(\"Vector to power 2 (element-wise)\")\n", - "print(np.power(a, 2))\n", - "print()\n", - "print(\"Euler number to the power a (element-wise)\")\n", - "print(np.exp(a))\n", - "print()\n", - "print(\"An even more complex expression\")\n", - "print((np.power(a, 2) + np.exp(a)) / np.sum(a))" - ] - }, - { - "cell_type": "markdown", - "id": "hearing-street", - "metadata": {}, - "source": [ - "## Numpy tasks" - ] - }, - { - "cell_type": "markdown", - "id": "regular-niger", - "metadata": {}, - "source": [ - "**Task 1.** Calculate the sigmoid (logistic) function on every element of the following numpy array [0.3, 1.2, -1.4, 0.2, -0.1, 0.1, 0.8, -0.25] and print the last 5 elements. Use only vector operations." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "agreed-single", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "another-catch", - "metadata": {}, - "source": [ - "**Task 2.** Calculate the dot product of the following two vectors:
\n", - "$x = [3, 1, 4, 2, 6, 1, 4, 8]$
\n", - "$y = [5, 2, 3, 12, 2, 4, 17, 11]$
\n", - "a) by using element-wise mutliplication and np.sum,
\n", - "b) by using np.dot,
\n", - "b) by using np.matmul and transposition (x.T)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "forbidden-journalism", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "acute-amber", - "metadata": {}, - "source": [ - "**Task 3.** Calculate the following expression
\n", - "$$\\frac{1}{1 + e^{-x_0 \\theta_0 - \\ldots - x_9 \\theta_9 - \\theta_{10}}}$$\n", - "for
\n", - "$x = [1.2, 2.3, 3.4, -0.7, 4.2, 2.7, -0.5, -2.1, -3.3, 0.2]$
\n", - "$\\theta = [7.7, 0.33, -2.12, -1.73, 2.9, -5.8, -0.9, 12.11, 3.43, -0.5, 1.65]$
\n", - "and print the result. Use only vector operations." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "falling-holder", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "latter-vector", - "metadata": {}, - "source": [ - "# Pandas" - ] - }, - { - "cell_type": "markdown", - "id": "contrary-vacuum", - "metadata": {}, - "source": [ - "## Load datasets\n", - "\n", - "- Steam (https://www.kaggle.com/tamber/steam-video-games)\n", - "\n", - "- MovieLens (https://grouplens.org/datasets/movielens/)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "alert-friday", - "metadata": {}, - "outputs": [], - "source": [ - "steam_df = pd.read_csv(os.path.join(\"data\", \"steam\", \"steam-200k.csv\"), \n", - " names=['user-id', 'game-title', 'behavior-name', 'value', 'zero'])\n", - "\n", - "ml_ratings_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"ratings.csv\"))\n", - "ml_movies_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"movies.csv\"))" - ] - }, - { - "cell_type": "markdown", - "id": "personal-productivity", - "metadata": {}, - "source": [ - "## Peek into the datasets" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "musical-trust", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
0151603712The Elder Scrolls V Skyrimpurchase1.00
1151603712The Elder Scrolls V Skyrimplay273.00
2151603712Fallout 4purchase1.00
3151603712Fallout 4play87.00
4151603712Sporepurchase1.00
5151603712Sporeplay14.90
6151603712Fallout New Vegaspurchase1.00
7151603712Fallout New Vegasplay12.10
8151603712Left 4 Dead 2purchase1.00
9151603712Left 4 Dead 2play8.90
\n", - "
" - ], - "text/plain": [ - " user-id game-title behavior-name value zero\n", - "0 151603712 The Elder Scrolls V Skyrim purchase 1.0 0\n", - "1 151603712 The Elder Scrolls V Skyrim play 273.0 0\n", - "2 151603712 Fallout 4 purchase 1.0 0\n", - "3 151603712 Fallout 4 play 87.0 0\n", - "4 151603712 Spore purchase 1.0 0\n", - "5 151603712 Spore play 14.9 0\n", - "6 151603712 Fallout New Vegas purchase 1.0 0\n", - "7 151603712 Fallout New Vegas play 12.1 0\n", - "8 151603712 Left 4 Dead 2 purchase 1.0 0\n", - "9 151603712 Left 4 Dead 2 play 8.9 0" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "steam_df.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "electrical-floor", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
userIdmovieIdratingtimestamp
0114.0964982703
1134.0964981247
2164.0964982224
31475.0964983815
41505.0964982931
51703.0964982400
611015.0964980868
711104.0964982176
811515.0964984041
911575.0964984100
\n", - "
" - ], - "text/plain": [ - " userId movieId rating timestamp\n", - "0 1 1 4.0 964982703\n", - "1 1 3 4.0 964981247\n", - "2 1 6 4.0 964982224\n", - "3 1 47 5.0 964983815\n", - "4 1 50 5.0 964982931\n", - "5 1 70 3.0 964982400\n", - "6 1 101 5.0 964980868\n", - "7 1 110 4.0 964982176\n", - "8 1 151 5.0 964984041\n", - "9 1 157 5.0 964984100" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ml_ratings_df.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "cordless-daniel", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movieIdtitlegenres
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
12Jumanji (1995)Adventure|Children|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama|Romance
45Father of the Bride Part II (1995)Comedy
56Heat (1995)Action|Crime|Thriller
67Sabrina (1995)Comedy|Romance
78Tom and Huck (1995)Adventure|Children
89Sudden Death (1995)Action
910GoldenEye (1995)Action|Adventure|Thriller
\n", - "
" - ], - "text/plain": [ - " movieId title \\\n", - "0 1 Toy Story (1995) \n", - "1 2 Jumanji (1995) \n", - "2 3 Grumpier Old Men (1995) \n", - "3 4 Waiting to Exhale (1995) \n", - "4 5 Father of the Bride Part II (1995) \n", - "5 6 Heat (1995) \n", - "6 7 Sabrina (1995) \n", - "7 8 Tom and Huck (1995) \n", - "8 9 Sudden Death (1995) \n", - "9 10 GoldenEye (1995) \n", - "\n", - " genres \n", - "0 Adventure|Animation|Children|Comedy|Fantasy \n", - "1 Adventure|Children|Fantasy \n", - "2 Comedy|Romance \n", - "3 Comedy|Drama|Romance \n", - "4 Comedy \n", - "5 Action|Crime|Thriller \n", - "6 Comedy|Romance \n", - "7 Adventure|Children \n", - "8 Action \n", - "9 Action|Adventure|Thriller " - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ml_movies_df.head(10)" - ] - }, - { - "cell_type": "markdown", - "id": "alpha-portal", - "metadata": {}, - "source": [ - "## Merge both MovieLens DataFrames into one" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "affecting-disclosure", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
userIdmovieIdratingtimestamptitlegenres
0114.0964982703Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
1514.0847434962Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
2714.51106635946Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
31512.51510577970Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
41714.51305696483Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
51813.51455209816Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
61914.0965705637Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
72113.51407618878Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
82713.0962685262Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
93115.0850466616Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
\n", - "
" - ], - "text/plain": [ - " userId movieId rating timestamp title \\\n", - "0 1 1 4.0 964982703 Toy Story (1995) \n", - "1 5 1 4.0 847434962 Toy Story (1995) \n", - "2 7 1 4.5 1106635946 Toy Story (1995) \n", - "3 15 1 2.5 1510577970 Toy Story (1995) \n", - "4 17 1 4.5 1305696483 Toy Story (1995) \n", - "5 18 1 3.5 1455209816 Toy Story (1995) \n", - "6 19 1 4.0 965705637 Toy Story (1995) \n", - "7 21 1 3.5 1407618878 Toy Story (1995) \n", - "8 27 1 3.0 962685262 Toy Story (1995) \n", - "9 31 1 5.0 850466616 Toy Story (1995) \n", - "\n", - " genres \n", - "0 Adventure|Animation|Children|Comedy|Fantasy \n", - "1 Adventure|Animation|Children|Comedy|Fantasy \n", - "2 Adventure|Animation|Children|Comedy|Fantasy \n", - "3 Adventure|Animation|Children|Comedy|Fantasy \n", - "4 Adventure|Animation|Children|Comedy|Fantasy \n", - "5 Adventure|Animation|Children|Comedy|Fantasy \n", - "6 Adventure|Animation|Children|Comedy|Fantasy \n", - "7 Adventure|Animation|Children|Comedy|Fantasy \n", - "8 Adventure|Animation|Children|Comedy|Fantasy \n", - "9 Adventure|Animation|Children|Comedy|Fantasy " - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ml_df = pd.merge(ml_ratings_df, ml_movies_df, on='movieId')\n", - "ml_df.head(10)" - ] - }, - { - "cell_type": "markdown", - "id": "lightweight-constitution", - "metadata": {}, - "source": [ - "## Choosing a row, a column or several columns" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "excited-interface", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
0151603712The Elder Scrolls V Skyrimpurchase1.00
1151603712The Elder Scrolls V Skyrimplay273.00
2151603712Fallout 4purchase1.00
3151603712Fallout 4play87.00
4151603712Sporepurchase1.00
5151603712Sporeplay14.90
6151603712Fallout New Vegaspurchase1.00
7151603712Fallout New Vegasplay12.10
8151603712Left 4 Dead 2purchase1.00
9151603712Left 4 Dead 2play8.90
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Choosing rows by index\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
3151603712Fallout 4play87.00
4151603712Sporepurchase1.00
5151603712Sporeplay14.90
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Choosing rows by position\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
3151603712Fallout 4play87.00
4151603712Sporepurchase1.00
5151603712Sporeplay14.90
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "display(HTML(steam_df.head(10).to_html()))\n", - "\n", - "# Choosing rows by index\n", - "chosen_df = steam_df[3:6]\n", - "\n", - "print(\"Choosing rows by index\")\n", - "display(HTML(chosen_df.head(10).to_html()))\n", - "\n", - "# Choosing rows by position\n", - "chosen_df = steam_df.iloc[3:6]\n", - "\n", - "print(\"Choosing rows by position\")\n", - "display(HTML(chosen_df.head(10).to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "reflected-banner", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0 The Elder Scrolls V Skyrim\n", - "1 The Elder Scrolls V Skyrim\n", - "2 Fallout 4\n", - "3 Fallout 4\n", - "4 Spore\n", - "5 Spore\n", - "6 Fallout New Vegas\n", - "7 Fallout New Vegas\n", - "8 Left 4 Dead 2\n", - "9 Left 4 Dead 2\n", - "Name: game-title, dtype: object\n" - ] - } - ], - "source": [ - "# Choosing a column\n", - "chosen_df = steam_df['game-title']\n", - "\n", - "print(chosen_df.head(10))" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "efficient-humidity", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-title
0151603712The Elder Scrolls V Skyrim
1151603712The Elder Scrolls V Skyrim
2151603712Fallout 4
3151603712Fallout 4
4151603712Spore
5151603712Spore
6151603712Fallout New Vegas
7151603712Fallout New Vegas
8151603712Left 4 Dead 2
9151603712Left 4 Dead 2
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Choosing several columns\n", - "chosen_df = steam_df[['user-id', 'game-title']]\n", - "\n", - "display(HTML(chosen_df.head(10).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "popular-cause", - "metadata": {}, - "source": [ - "### Splitting the dataset into training and test set" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "continuous-cheat", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Shuffled range of indices\n", - "[ 88886 27084 35588 56116 183664 34019 190384 138109 48325 94171\n", - " 163304 35071 45875 187591 107927 62332 97588 3784 669 75931]\n", - "\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
88886173434036Mortal Kombat Xpurchase1.00
2708480779496Sins of a Solar Empire Trinityplay0.60
35588109669093Killing Floorplay225.00
5611694269421Fallout 4play10.10
183664279406744BLOCKADE 3Dpurchase1.00
34019126269125Grand Theft Auto San Andreaspurchase1.00
190384713354027 Days to Dieplay8.20
138109156818121Half-Life 2play22.00
48325114617787Garry's Modplay1.20
94171156615447LEGO MARVEL Super Heroesplay1.70
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
17008081591317Warframepurchase1.00
8527944472980Serious Sam Double D XXLpurchase1.00
13291645592640Penumbra Black Plaguepurchase1.00
1219364787956Always Sometimes Monsterspurchase1.00
46374192538478Heroes & Generalsplay0.40
898231936551Castle Crasherspurchase1.00
179113132196353Knights and Merchantspurchase1.00
14400213190476Blood Bowl 2play6.30
3541660296891Mirror's Edgepurchase1.00
12078662990992Rome Total Warpurchase1.00
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "160000\n", - "40000\n" - ] - } - ], - "source": [ - "shuffle = np.array(list(range(len(steam_df))))\n", - "\n", - "# alternatively\n", - "\n", - "shuffle = np.arange(len(steam_df))\n", - "\n", - "np.random.shuffle(shuffle)\n", - "# shuffle = list(shuffle)\n", - "print(\"Shuffled range of indices\")\n", - "print(shuffle[:20])\n", - "print()\n", - "\n", - "train_test_split = 0.8\n", - "split_index = int(len(steam_df) * train_test_split)\n", - "\n", - "training_set = steam_df.iloc[shuffle[:split_index]]\n", - "test_set = steam_df.iloc[shuffle[split_index:]]\n", - "\n", - "display(HTML(training_set.head(10).to_html()))\n", - "\n", - "display(HTML(test_set.head(10).to_html()))\n", - "\n", - "print(len(training_set))\n", - "print(len(test_set))" - ] - }, - { - "cell_type": "markdown", - "id": "outside-twist", - "metadata": {}, - "source": [ - "## Filtering" - ] - }, - { - "cell_type": "markdown", - "id": "otherwise-rachel", - "metadata": {}, - "source": [ - "### Filtering columns" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "numerical-pride", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-title
0151603712The Elder Scrolls V Skyrim
1151603712The Elder Scrolls V Skyrim
2151603712Fallout 4
3151603712Fallout 4
4151603712Spore
5151603712Spore
6151603712Fallout New Vegas
7151603712Fallout New Vegas
8151603712Left 4 Dead 2
9151603712Left 4 Dead 2
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "chosen_df = steam_df.loc[:, ['user-id', 'game-title']]\n", - "\n", - "display(HTML(chosen_df.head(10).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "interior-cleaner", - "metadata": {}, - "source": [ - "### Filtering rows" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "marine-growth", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0 False\n", - "1 False\n", - "2 True\n", - "3 True\n", - "4 False\n", - "5 False\n", - "6 False\n", - "7 False\n", - "8 False\n", - "9 False\n", - "Name: game-title, dtype: bool\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
2151603712Fallout 4purchase1.00
3151603712Fallout 4play87.00
318787445402Fallout 4purchase1.00
318887445402Fallout 4play83.00
568325096601Fallout 4purchase1.00
568425096601Fallout 4play1.60
6219211925330Fallout 4purchase1.00
6220211925330Fallout 4play133.00
7300115396529Fallout 4purchase1.00
7301115396529Fallout 4play17.90
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "condition = steam_df['game-title'] == 'Fallout 4'\n", - "\n", - "print(condition.head(10))\n", - "\n", - "chosen_df = steam_df.loc[condition]\n", - "\n", - "display(HTML(chosen_df.head(10).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "functioning-condition", - "metadata": {}, - "source": [ - "### Filtering rows and columns at once" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "advanced-religion", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlevalue
3151603712Fallout 487.0
318887445402Fallout 483.0
568425096601Fallout 41.6
6220211925330Fallout 4133.0
7301115396529Fallout 417.9
75274834220Fallout 419.8
761765229865Fallout 40.5
771265958466Fallout 4123.0
996391800733Fallout 463.0
1070043913966Fallout 465.0
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "condition = (steam_df['game-title'] == 'Fallout 4') & (steam_df['behavior-name'] == 'play')\n", - "\n", - "chosen_df = steam_df.loc[condition, ['user-id', 'game-title', 'value']]\n", - "\n", - "display(HTML(chosen_df.head(10).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "frequent-match", - "metadata": {}, - "source": [ - "## Simple operations on columns" - ] - }, - { - "cell_type": "markdown", - "id": "described-sister", - "metadata": {}, - "source": [ - "### Multiply a column by 2" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "injured-sweet", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
0151603712The Elder Scrolls V Skyrimpurchase1.00
1151603712The Elder Scrolls V Skyrimplay273.00
2151603712Fallout 4purchase1.00
3151603712Fallout 4play87.00
4151603712Sporepurchase1.00
5151603712Sporeplay14.90
6151603712Fallout New Vegaspurchase1.00
7151603712Fallout New Vegasplay12.10
8151603712Left 4 Dead 2purchase1.00
9151603712Left 4 Dead 2play8.90
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
0151603712The Elder Scrolls V Skyrimpurchase2.00
1151603712The Elder Scrolls V Skyrimplay546.00
2151603712Fallout 4purchase2.00
3151603712Fallout 4play174.00
4151603712Sporepurchase2.00
5151603712Sporeplay29.80
6151603712Fallout New Vegaspurchase2.00
7151603712Fallout New Vegasplay24.20
8151603712Left 4 Dead 2purchase2.00
9151603712Left 4 Dead 2play17.80
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "steam_df_copy = steam_df.copy()\n", - "\n", - "display(HTML(steam_df_copy.head(10).to_html()))\n", - "\n", - "steam_df_copy.loc[:, 'value'] = steam_df_copy['value'] * 2\n", - "\n", - "display(HTML(steam_df_copy.head(10).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "executed-processor", - "metadata": {}, - "source": [ - "### Choose the first n letters of a string" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "forbidden-mining", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movieIdtitlegenres
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
12Jumanji (1995)Adventure|Children|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama|Romance
45Father of the Bride Part II (1995)Comedy
56Heat (1995)Action|Crime|Thriller
67Sabrina (1995)Comedy|Romance
78Tom and Huck (1995)Adventure|Children
89Sudden Death (1995)Action
910GoldenEye (1995)Action|Adventure|Thriller
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movieIdtitlegenres
01Toy StAdventure|Animation|Children|Comedy|Fantasy
12JumanjAdventure|Children|Fantasy
23GrumpiComedy|Romance
34WaitinComedy|Drama|Romance
45FatherComedy
56Heat (Action|Crime|Thriller
67SabrinComedy|Romance
78Tom anAdventure|Children
89SuddenAction
910GoldenAction|Adventure|Thriller
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ml_movies_df_copy = ml_movies_df.copy()\n", - "\n", - "display(HTML(ml_movies_df_copy.head(10).to_html()))\n", - "\n", - "ml_movies_df_copy.loc[:, 'title'] = ml_movies_df_copy['title'].str[:6]\n", - "\n", - "display(HTML(ml_movies_df_copy.head(10).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "incorporated-entrance", - "metadata": {}, - "source": [ - "### Take the mean of a column" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "selected-trial", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "17.874384000000475\n", - "17.874384000000475\n" - ] - } - ], - "source": [ - "# Option 1\n", - "print(steam_df['value'].mean())\n", - "\n", - "# Option 2\n", - "print(np.mean(steam_df['value']))" - ] - }, - { - "cell_type": "markdown", - "id": "discrete-cheese", - "metadata": {}, - "source": [ - "### Simple operation on filtered data" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "bridal-greenhouse", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
1151603712The Elder Scrolls V Skyrimplay273.00
3151603712Fallout 4play87.00
7359945701The Elder Scrolls V Skyrimplay58.00
106692107940The Elder Scrolls V Skyrimplay110.00
1168250006052The Elder Scrolls V Skyrimplay465.00
138811373749The Elder Scrolls V Skyrimplay220.00
206554103616The Elder Scrolls V Skyrimplay35.00
256956038151The Elder Scrolls V Skyrimplay14.60
318887445402Fallout 4play83.00
323394088853The Elder Scrolls V Skyrimplay320.00
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
1151603712The Elder Scrolls V Skyrimplay273.00
3151603712Fallout 4play174.00
7359945701The Elder Scrolls V Skyrimplay58.00
106692107940The Elder Scrolls V Skyrimplay110.00
1168250006052The Elder Scrolls V Skyrimplay465.00
138811373749The Elder Scrolls V Skyrimplay220.00
206554103616The Elder Scrolls V Skyrimplay35.00
256956038151The Elder Scrolls V Skyrimplay14.60
318887445402Fallout 4play166.00
323394088853The Elder Scrolls V Skyrimplay320.00
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "steam_df_copy = steam_df.loc[((steam_df['game-title'] == 'Fallout 4') | (steam_df['game-title'] == 'The Elder Scrolls V Skyrim')) \n", - " & (steam_df['behavior-name'] == 'play')].copy()\n", - "\n", - "display(HTML(steam_df_copy.head(10).to_html()))\n", - "\n", - "condition = (steam_df_copy['game-title'] == 'Fallout 4') & (steam_df_copy['behavior-name'] == 'play')\n", - "\n", - "steam_df_copy.loc[condition, 'value'] = steam_df_copy.loc[condition, 'value'] * 2\n", - "\n", - "display(HTML(steam_df_copy.head(10).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "relevant-strap", - "metadata": {}, - "source": [ - "## Advanced operations on columns" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "female-french", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
1151603712The Elder Scrolls V Skyrimplay273.00
3151603712Fallout 4play87.00
5151603712Sporeplay14.90
7151603712Fallout New Vegasplay12.10
9151603712Left 4 Dead 2play8.90
11151603712HuniePopplay8.50
13151603712Path of Exileplay8.10
15151603712Poly Bridgeplay7.50
17151603712Left 4 Deadplay3.30
19151603712Team Fortress 2play2.80
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
1151603712The Elder Scrolls V Skyrimplay4.0000000
3151603712Fallout 4play4.0000000
5151603712Sporeplay2.7663190
7151603712Fallout New Vegasplay2.5726120
9151603712Left 4 Dead 2play2.2925350
11151603712HuniePopplay2.2512920
13151603712Path of Exileplay2.2082740
15151603712Poly Bridgeplay2.1400660
17151603712Left 4 Deadplay1.4586150
19151603712Team Fortress 2play1.3350010
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def reduce_outliers(x):\n", - " return min(np.log(1 + x), 4)\n", - "\n", - "steam_df_copy = steam_df.loc[steam_df['behavior-name'] == 'play'].copy()\n", - "\n", - "display(HTML(steam_df_copy.head(10).to_html()))\n", - "\n", - "steam_df_copy.loc[:, 'value'] = steam_df_copy['value'].apply(reduce_outliers)\n", - "\n", - "display(HTML(steam_df_copy.head(10).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "supported-graphic", - "metadata": {}, - "source": [ - "### The same apply operation can be achieved with the use of a lambda function" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "id": "objective-survey", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
1151603712The Elder Scrolls V Skyrimplay273.00
3151603712Fallout 4play87.00
5151603712Sporeplay14.90
7151603712Fallout New Vegasplay12.10
9151603712Left 4 Dead 2play8.90
11151603712HuniePopplay8.50
13151603712Path of Exileplay8.10
15151603712Poly Bridgeplay7.50
17151603712Left 4 Deadplay3.30
19151603712Team Fortress 2play2.80
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
1151603712The Elder Scrolls V Skyrimplay4.0000000
3151603712Fallout 4play4.0000000
5151603712Sporeplay2.7663190
7151603712Fallout New Vegasplay2.5726120
9151603712Left 4 Dead 2play2.2925350
11151603712HuniePopplay2.2512920
13151603712Path of Exileplay2.2082740
15151603712Poly Bridgeplay2.1400660
17151603712Left 4 Deadplay1.4586150
19151603712Team Fortress 2play1.3350010
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "steam_df_copy = steam_df.loc[steam_df['behavior-name'] == 'play'].copy()\n", - "\n", - "display(HTML(steam_df_copy.head(10).to_html()))\n", - "\n", - "steam_df_copy.loc[:, 'value'] = steam_df_copy['value'].apply(lambda x: min(np.log(1 + x), 4))\n", - "\n", - "display(HTML(steam_df_copy.head(10).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "guilty-single", - "metadata": {}, - "source": [ - "### Apply on two columns at once" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "id": "thrown-geneva", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezero
1151603712The Elder Scrolls V Skyrimplay273.00
3151603712Fallout 4play87.00
5151603712Sporeplay14.90
7151603712Fallout New Vegasplay12.10
9151603712Left 4 Dead 2play8.90
11151603712HuniePopplay8.50
13151603712Path of Exileplay8.10
15151603712Poly Bridgeplay7.50
17151603712Left 4 Deadplay3.30
19151603712Team Fortress 2play2.80
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezerovalue_2
1151603712The Elder Scrolls V Skyrimplay273.004.000000
3151603712Fallout 4play87.004.000000
5151603712Sporeplay14.902.766319
7151603712Fallout New Vegasplay12.102.572612
9151603712Left 4 Dead 2play8.902.292535
11151603712HuniePopplay8.502.251292
13151603712Path of Exileplay8.102.208274
15151603712Poly Bridgeplay7.502.140066
17151603712Left 4 Deadplay3.301.458615
19151603712Team Fortress 2play2.801.335001
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user-idgame-titlebehavior-namevaluezerovalue_2
1151603712The Elder Scrolls V Skyrimplay1092.00000004.000000
3151603712Fallout 4play348.00000004.000000
5151603712Sporeplay41.21815502.766319
7151603712Fallout New Vegasplay31.12860802.572612
9151603712Left 4 Dead 2play20.40355902.292535
11151603712HuniePopplay19.13598002.251292
13151603712Path of Exileplay17.88702302.208274
15151603712Poly Bridgeplay16.05049602.140066
17151603712Left 4 Deadplay4.81343001.458615
19151603712Team Fortress 2play3.73800301.335001
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "steam_df_copy = steam_df.loc[steam_df['behavior-name'] == 'play'].copy()\n", - "\n", - "display(HTML(steam_df_copy.head(10).to_html()))\n", - "\n", - "steam_df_copy.loc[:, 'value_2'] = steam_df_copy['value'].apply(lambda x: min(np.log(1 + x), 4))\n", - "\n", - "display(HTML(steam_df_copy.head(10).to_html()))\n", - "\n", - "steam_df_copy.loc[:, 'value'] = steam_df_copy[['value', 'value_2']].apply(lambda x: x[0] * x[1], axis=1)\n", - "\n", - "display(HTML(steam_df_copy.head(10).to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "id": "governing-alexandria", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movieIdtitlegenres
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
12Jumanji (1995)Adventure|Children|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama|Romance
45Father of the Bride Part II (1995)Comedy
56Heat (1995)Action|Crime|Thriller
67Sabrina (1995)Comedy|Romance
78Tom and Huck (1995)Adventure|Children
89Sudden Death (1995)Action
910GoldenEye (1995)Action|Adventure|Thriller
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movieIdtitlegenrestitle|genres
01Toy Story (1995)Adventure|Animation|Children|Comedy|FantasyToy Story (1995)|Adventure|Animation|Children|Comedy|Fantasy
12Jumanji (1995)Adventure|Children|FantasyJumanji (1995)|Adventure|Children|Fantasy
23Grumpier Old Men (1995)Comedy|RomanceGrumpier Old Men (1995)|Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama|RomanceWaiting to Exhale (1995)|Comedy|Drama|Romance
45Father of the Bride Part II (1995)ComedyFather of the Bride Part II (1995)|Comedy
56Heat (1995)Action|Crime|ThrillerHeat (1995)|Action|Crime|Thriller
67Sabrina (1995)Comedy|RomanceSabrina (1995)|Comedy|Romance
78Tom and Huck (1995)Adventure|ChildrenTom and Huck (1995)|Adventure|Children
89Sudden Death (1995)ActionSudden Death (1995)|Action
910GoldenEye (1995)Action|Adventure|ThrillerGoldenEye (1995)|Action|Adventure|Thriller
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ml_movies_df_copy = ml_movies_df.copy()\n", - "\n", - "display(HTML(ml_movies_df_copy.head(10).to_html()))\n", - "\n", - "ml_movies_df_copy.loc[:, 'title|genres'] = ml_movies_df_copy[['title', 'genres']].apply(lambda x: x[0] + \"|\" + x[1], axis=1)\n", - "\n", - "display(HTML(ml_movies_df_copy.head(10).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "critical-fields", - "metadata": {}, - "source": [ - "## Grouping and aggregating" - ] - }, - { - "cell_type": "markdown", - "id": "biological-light", - "metadata": {}, - "source": [ - "### Find the most popular games (in terms of purchases)" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "id": "greenhouse-scout", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
value
game-title
007 Legends1.0
0RBITALIS3.0
1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby)7.0
10 Second Ninja6.0
10,000,0001.0
100% Orange Juice10.0
1000 Amps2.0
12 Labours of Hercules10.0
12 Labours of Hercules II The Cretan Bull12.0
12 Labours of Hercules III Girl Power6.0
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
game-titlevalue
0Dota 24841.0
1Team Fortress 22323.0
2Unturned1563.0
3Counter-Strike Global Offensive1412.0
4Half-Life 2 Lost Coast981.0
5Counter-Strike Source978.0
6Left 4 Dead 2951.0
7Counter-Strike856.0
8Warframe847.0
9Half-Life 2 Deathmatch823.0
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "steam_grouped = steam_df.loc[steam_df['behavior-name'] == 'purchase', ['game-title', 'value']]\n", - "steam_grouped = steam_grouped.groupby('game-title').sum()\n", - "display(HTML(steam_grouped.head(10).to_html()))\n", - "\n", - "steam_grouped = steam_grouped.sort_values(by='value', ascending=False).reset_index()\n", - "\n", - "display(HTML(steam_grouped.head(10).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "indie-calcium", - "metadata": {}, - "source": [ - "## Iterating over a DataFrame (if possible, use column operations instead)" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "id": "laden-intersection", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0, 151603712, The Elder Scrolls V Skyrim, purchase]\n", - "[1, 151603712, The Elder Scrolls V Skyrim, play]\n", - "[2, 151603712, Fallout 4, purchase]\n", - "[3, 151603712, Fallout 4, play]\n", - "[4, 151603712, Spore, purchase]\n", - "[5, 151603712, Spore, play]\n", - "[6, 151603712, Fallout New Vegas, purchase]\n", - "[7, 151603712, Fallout New Vegas, play]\n", - "[8, 151603712, Left 4 Dead 2, purchase]\n", - "[9, 151603712, Left 4 Dead 2, play]\n" - ] - } - ], - "source": [ - "i = 0\n", - "for idx, row in steam_df.iterrows():\n", - " print(\"[{}, {}, {}, {}]\".format(idx, row['user-id'], row['game-title'], row['behavior-name']))\n", - " i += 1\n", - " if i == 10:\n", - " break" - ] - }, - { - "cell_type": "markdown", - "id": "objective-associate", - "metadata": {}, - "source": [ - "## Pandas tasks - Steam dataset" - ] - }, - { - "cell_type": "markdown", - "id": "floppy-american", - "metadata": {}, - "source": [ - "**Task 4.** How many people made a purchase in the Steam dataset? Remember that a person could by many games, but you need to count every person once." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "decimal-grass", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "protected-glossary", - "metadata": {}, - "source": [ - "**Task 5.** How many people made a purchase of \"The Elder Scrolls V Skyrim\"?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "distant-overview", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "vocational-weekly", - "metadata": {}, - "source": [ - "**Task 6.** How many purchases people made on average?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "reflected-cathedral", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "signed-transaction", - "metadata": {}, - "source": [ - "**Task 7.** Who bought the most games?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "handmade-revolution", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "piano-bobby", - "metadata": {}, - "source": [ - "**Task 8.** How many hours on average people played in \"The Elder Scrolls V Skyrim\"?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "hydraulic-observation", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "stuffed-creativity", - "metadata": {}, - "source": [ - "**Task 9.** Which games were played the most (in terms of the number of hours played)? Print the first 10 titles and respective numbers of hours." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "challenging-truck", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "crude-petroleum", - "metadata": {}, - "source": [ - "**Task 10.** Which games are the most consistently played (in terms of the average number of hours played)? Print the first 10 titles and respective numbers of hours." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "surgical-lawsuit", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "monetary-toyota", - "metadata": {}, - "source": [ - "**Task 11\\*\\*.** Fix the above for the fact that 0 hours played is not listed, but only a purchase is recorded in such a case." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "protective-report", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "ceramic-awareness", - "metadata": {}, - "source": [ - "**Task 12.** Apply the sigmoid function\n", - "$$f(x) = \\frac{1}{1 + e^{-\\frac{1}{100}x}}$$\n", - "to hours played and print the first 10 rows from the entire Steam dataset after this change." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "optical-announcement", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "actual-spotlight", - "metadata": {}, - "source": [ - "## Pandas tasks - MovieLens dataset" - ] - }, - { - "cell_type": "markdown", - "id": "inclusive-crash", - "metadata": {}, - "source": [ - "**Task 13\\*.** Calculate popularity (by the number of users who watched a movie) of all genres." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "developmental-seven", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "personalized-finland", - "metadata": {}, - "source": [ - "**Task 14\\*.** Calculate average rating for all genres." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "inside-personal", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "vertical-stick", - "metadata": {}, - "source": [ - "**Task 15.** Calculate each movie rating bias (deviation from the mean of all movies average ratings). Print first 10 in the form: title, average rating, bias." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "greatest-screen", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "hawaiian-haiti", - "metadata": {}, - "source": [ - "**Task 16.** Calculate each user rating bias (deviation from the mean of all users average ratings). Print first 10 in the form: user_id, average rating, bias." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "charitable-guyana", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "intimate-porcelain", - "metadata": {}, - "source": [ - "**Task 17.** Randomly choose 10 movies and 10 users and print their interaction matrix in the form of a DataFrame with user_id as index and movie titles as columns (use HTML Display for that). You can iterate over the DataFrame in this task." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "brazilian-frost", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "boolean-modem", - "metadata": {}, - "source": [ - "## Pandas + numpy tasks" - ] - }, - { - "cell_type": "markdown", - "id": "worldwide-disclaimer", - "metadata": {}, - "source": [ - "**Task 18.** Create the entire interaction matrix for the MovieLens dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "marine-initial", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "micro-vision", - "metadata": {}, - "source": [ - "**Task 19.** Calculate the matrix of size (n_users, n_users) where at position (i, j) is the number of movies watched both by user i and user j. Print the submatrix of first 10 rows and 10 columns." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "swedish-lambda", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "shaped-advance", - "metadata": {}, - "source": [ - "**Task 20.** Calculate the matrix of size (n_items, n_items) where at position (i, j) is the number of users who watched both movie i and movie j. Print the submatrix of first 10 rows and 10 columns." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "quality-bubble", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/class_3_content_based_recommenders.ipynb b/class_3_content_based_recommenders.ipynb deleted file mode 100644 index 2f51820..0000000 --- a/class_3_content_based_recommenders.ipynb +++ /dev/null @@ -1,1131 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "literary-toyota", - "metadata": {}, - "source": [ - "# Content-based recommenders\n", - "\n", - "Content-based recommenders in their recommendations rely purely on the features of items. Conceptually it can be expressed as a model of the form (personalized):\n", - "
\n", - "$$\n", - " score \\sim (user, item\\_feature_1, item\\_feature_2, ..., item\\_feature_n)\n", - "$$\n", - "
\n", - "or (not personalized)\n", - "
\n", - "$$\n", - " score \\sim (item\\_feature_1, item\\_feature_2, ..., item\\_feature_n)\n", - "$$\n", - "
\n", - "\n", - " + Content-based recommenders do not suffer from the cold-start problem for new items.\n", - " - They do not use information about complex patterns of user-item interactions - what other similar users have already discovered and liked." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "exciting-specific", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "from IPython.display import Markdown, display, HTML\n", - "from collections import defaultdict\n", - "from sklearn.model_selection import KFold\n", - "\n", - "# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n", - "import os\n", - "os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'" - ] - }, - { - "cell_type": "markdown", - "id": "administrative-charleston", - "metadata": {}, - "source": [ - "# Load the data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "architectural-andrews", - "metadata": {}, - "outputs": [], - "source": [ - "ml_ratings_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"ratings.csv\")).rename(columns={'userId': 'user_id', 'movieId': 'item_id'})\n", - "ml_movies_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"movies.csv\")).rename(columns={'movieId': 'item_id'})\n", - "ml_df = pd.merge(ml_ratings_df, ml_movies_df, on='item_id')\n", - "ml_df.head(10)\n", - "\n", - "display(HTML(ml_movies_df.head(10).to_html()))\n", - "\n", - "# Filter the data to reduce the number of movies\n", - "rng = np.random.RandomState(seed=6789)\n", - "left_ids = rng.choice(ml_movies_df['item_id'], size=100, replace=False)\n", - "\n", - "ml_ratings_df = ml_ratings_df.loc[ml_ratings_df['item_id'].isin(left_ids)]\n", - "ml_movies_df = ml_movies_df.loc[ml_movies_df['item_id'].isin(left_ids)]\n", - "ml_df = ml_df.loc[ml_df['item_id'].isin(left_ids)]\n", - "\n", - "print(\"Number of left interactions: {}\".format(len(ml_ratings_df)))" - ] - }, - { - "cell_type": "markdown", - "id": "effective-renaissance", - "metadata": {}, - "source": [ - "# Recommender class\n", - "\n", - "Remark: Docstrings written in reStructuredText (reST) used by Sphinx to automatically generate code documentation. It is also used by default by PyCharm (type triple quotes after defining a class or a method and hit enter)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cooperative-synthesis", - "metadata": {}, - "outputs": [], - "source": [ - "class Recommender(object):\n", - " \"\"\"\n", - " Base recommender class.\n", - " \"\"\"\n", - " \n", - " def __init__(self):\n", - " \"\"\"\n", - " Initialize base recommender params and variables.\n", - " \"\"\"\n", - " pass\n", - " \n", - " def fit(self, interactions_df, users_df, items_df):\n", - " \"\"\"\n", - " Training of the recommender.\n", - " \n", - " :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items \n", - " defined by user_id, item_id and features of the interaction.\n", - " :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.\n", - " \"\"\"\n", - " pass\n", - " \n", - " def recommend(self, users_df, items_df, n_recommendations=1):\n", - " \"\"\"\n", - " Serving of recommendations. Scores items in items_df for each user in users_df and returns \n", - " top n_recommendations for each user.\n", - " \n", - " :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.\n", - " :param int n_recommendations: Number of recommendations to be returned for each user.\n", - " :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations \n", - " for each user.\n", - " :rtype: pd.DataFrame\n", - " \"\"\"\n", - " \n", - " recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n", - " \n", - " for ix, user in users_df.iterrows():\n", - " user_recommendations = pd.DataFrame({'user_id': user['user_id'],\n", - " 'item_id': [-1] * n_recommendations,\n", - " 'score': [3.0] * n_recommendations})\n", - "\n", - " recommendations = pd.concat([recommendations, user_recommendations])\n", - "\n", - " return recommendations" - ] - }, - { - "cell_type": "markdown", - "id": "cleared-warehouse", - "metadata": {}, - "source": [ - "# Evaluation measures" - ] - }, - { - "cell_type": "markdown", - "id": "overall-perspective", - "metadata": {}, - "source": [ - "## Explicit feedback - ratings" - ] - }, - { - "cell_type": "markdown", - "id": "tamil-anderson", - "metadata": {}, - "source": [ - "### RMSE - Root Mean Squared Error\n", - "\n", - "
\n", - "$$\n", - " RMSE = \\sqrt{\\frac{\\sum_{i}^N (\\hat{r}_i - r_i)^2}{N}}\n", - "$$\n", - "
\n", - "\n", - "where $\\hat{r}_i$ are the predicted ratings and $r_i$ are the real ratings and $N$ is the number of items in the test set.\n", - "\n", - " + Very well-behaved analytically and therefore extensively used to train models, especially neural networks.\n", - " - The scale of errors dependent on data which reduced comparability between different datasets." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "entitled-snake", - "metadata": {}, - "outputs": [], - "source": [ - "def rmse(r_pred, r_real):\n", - " return np.sqrt(np.sum(np.power(r_pred - r_real, 2)) / len(r_pred))\n", - "\n", - "# Test\n", - "\n", - "print(\"RMSE = {:.2f}\".format(rmse(np.array([2.1, 1.2, 3.8, 4.2, 3.6]), np.array([3, 2, 4, 5, 1]))))" - ] - }, - { - "cell_type": "markdown", - "id": "unknown-arrival", - "metadata": {}, - "source": [ - "### MRE - Mean Relative Error\n", - "\n", - "
\n", - "$$\n", - " MRE = \\frac{1}{N} \\sum_{i}^N \\frac{|\\hat{r}_i - r_i|}{|r_i|}\n", - "$$\n", - "
\n", - "\n", - "where $\\hat{r}_i$ are the predicted ratings and $r_i$ are the real ratings and $N$ is the number of items in the test set.\n", - "\n", - " + Easily interpretable (average percentage error) and with a meaning understandable for business.\n", - " - Blows up when there are values close to zero among the predicted values." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dying-course", - "metadata": {}, - "outputs": [], - "source": [ - "def mre(r_pred, r_real):\n", - " return 1 / len(r_pred) * np.sum(np.abs(r_pred - r_real) / np.abs(r_real))\n", - "\n", - "# Test\n", - "\n", - "print(\"MRE = {:.4f}\".format(mre(np.array([2.1, 1.2, 3.8, 4.2, 3.6]), np.array([3, 2, 4, 5, 1]))))" - ] - }, - { - "cell_type": "markdown", - "id": "imported-contribution", - "metadata": {}, - "source": [ - "### TRE - Total Relative Error\n", - "\n", - "
\n", - "$$\n", - " TRE = \\frac{\\sum_{i}^N |\\hat{r}_i - r_i|}{\\sum_{i}^N |r_i|}\n", - "$$\n", - "
\n", - "\n", - "where $\\hat{r}_i$ are the predicted ratings and $r_i$ are the real ratings and $N$ is the number of items in the test set.\n", - "\n", - " + Easily interpretable (total percentage error) and with a meaning understandable for business.\n", - " + Reliable even for very small predicted values.\n", - " - Does not distinguish between a case when one prediction is very bad and other are very good and a case when all predictions are mediocre." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "premium-trouble", - "metadata": {}, - "outputs": [], - "source": [ - "def tre(r_pred, r_real):\n", - " return np.sum(np.abs(r_pred - r_real)) / np.sum(np.abs(r_real))\n", - "\n", - "# Test\n", - "\n", - "print(\"TRE = {:.4f}\".format(tre(np.array([2.1, 1.2, 3.8, 4.2, 3.6]), np.array([3, 2, 4, 5, 1]))))" - ] - }, - { - "cell_type": "markdown", - "id": "quantitative-navigation", - "metadata": {}, - "source": [ - "## Implicit feedback - binary indicators of interactions" - ] - }, - { - "cell_type": "markdown", - "id": "obvious-egypt", - "metadata": {}, - "source": [ - "### HR@n - Hit Ratio \n", - "How many hits did we score in the first n recommendations.\n", - "
\n", - "
\n", - "
\n", - "$$\n", - " \\text{HR@}n = \\frac{\\sum_{u} \\sum_{i \\in I_u} r_{u, i} \\cdot 1_{\\hat{D}_n(u)}(i)}{M}\n", - "$$\n", - "
\n", - "\n", - "where:\n", - " * $r_{u, i}$ is $1$ if there was an interaction between user $u$ and item $i$ in the test set and $0$ otherwise, \n", - " * $\\hat{D}_n$ is the set of the first $n$ recommendations for user $u$, \n", - " * $1_{\\hat{D}_n}(i)$ is $1$ if and only if $i \\in \\hat{D}_n$, otherwise it's equal to $0$,\n", - " * $M$ is the number of users.\n", - "\n", - "\n", - " + Easily interpretable.\n", - " - Does not take the rank of each recommendation into account." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "found-amazon", - "metadata": {}, - "outputs": [], - "source": [ - "def hr(recommendations, real_interactions, n=1):\n", - " \"\"\"\n", - " Assumes recommendations are ordered by user_id and then by score.\n", - " \"\"\"\n", - " # Transform real_interactions to a dict for a large speed-up\n", - " rui = defaultdict(lambda: 0)\n", - " \n", - " for idx, row in real_interactions.iterrows():\n", - " rui[(row['user_id'], row['item_id'])] = 1\n", - " \n", - " hr = 0.0\n", - " \n", - " previous_user_id = -1\n", - " rank = 0\n", - " for idx, row in recommendations.iterrows():\n", - " if previous_user_id == row['user_id']:\n", - " rank += 1\n", - " else:\n", - " rank = 1\n", - " \n", - " if rank <= n:\n", - " hr += rui[(row['user_id'], row['item_id'])]\n", - " \n", - " previous_user_id = row['user_id']\n", - " \n", - " hr /= len(recommendations['user_id'].unique())\n", - " \n", - " return hr\n", - "\n", - " \n", - "recommendations = pd.DataFrame(\n", - " [\n", - " [1, 13, 0.9],\n", - " [1, 45, 0.8],\n", - " [1, 22, 0.71],\n", - " [1, 77, 0.55],\n", - " [1, 9, 0.52],\n", - " [2, 11, 0.85],\n", - " [2, 13, 0.69],\n", - " [2, 25, 0.64],\n", - " [2, 6, 0.60],\n", - " [2, 77, 0.53]\n", - " \n", - " ], columns=['user_id', 'item_id', 'score'])\n", - "\n", - "display(HTML(recommendations.to_html()))\n", - "\n", - "real_interactions = pd.DataFrame(\n", - " [\n", - " [1, 45],\n", - " [1, 22],\n", - " [1, 77],\n", - " [2, 13],\n", - " [2, 77]\n", - " \n", - " ], columns=['user_id', 'item_id'])\n", - "\n", - "display(HTML(real_interactions.to_html()))\n", - " \n", - "print(\"HR@3 = {:.4f}\".format(hr(recommendations, real_interactions, n=3)))" - ] - }, - { - "cell_type": "markdown", - "id": "behind-munich", - "metadata": {}, - "source": [ - "### NDCG@n - Normalized Discounted Cumulative Gain\n", - "\n", - "How many hits did we score in the first n recommendations discounted by the position of each recommendation.\n", - "
\n", - "
\n", - "
\n", - "$$\n", - " \\text{NDCG@}n = \\frac{\\sum_{u} \\sum_{i \\in I_u} \\frac{r_{u, i}}{log\\left(1 + v_{\\hat{D}_n(u)}(i)\\right)}}{M}\n", - "$$\n", - "
\n", - "\n", - "where:\n", - " * $r_{u, i}$ is $1$ if there was an interaction between user $u$ and item $i$ in the test set and $0$ otherwise, \n", - " * $\\hat{D}_n(u)$ is the set of the first $n$ recommendations for user $u$, \n", - " * $v_{\\hat{D}_n(u)}(i)$ is the position of item $i$ in recommendations $\\hat{D}_n$,\n", - " * $M$ is the number of users.\n", - "\n", - "\n", - " - Takes the rank of each recommendation into account." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "floral-anatomy", - "metadata": {}, - "outputs": [], - "source": [ - "def ndcg(recommendations, real_interactions, n=1):\n", - " \"\"\"\n", - " Assumes recommendations are ordered by user_id and then by score.\n", - " \"\"\"\n", - " # Transform real_interactions to a dict for a large speed-up\n", - " rui = defaultdict(lambda: 0)\n", - " \n", - " for idx, row in real_interactions.iterrows():\n", - " rui[(row['user_id'], row['item_id'])] = 1\n", - " \n", - " ndcg = 0.0\n", - " \n", - " previous_user_id = -1\n", - " rank = 0\n", - " for idx, row in recommendations.iterrows():\n", - " if previous_user_id == row['user_id']:\n", - " rank += 1\n", - " else:\n", - " rank = 1\n", - " \n", - " if rank <= n:\n", - " ndcg += rui[(row['user_id'], row['item_id'])] / np.log2(1 + rank)\n", - " \n", - " previous_user_id = row['user_id']\n", - " \n", - " ndcg /= len(recommendations['user_id'].unique())\n", - " \n", - " return ndcg\n", - "\n", - " \n", - "recommendations = pd.DataFrame(\n", - " [\n", - " [1, 13, 0.9],\n", - " [1, 45, 0.8],\n", - " [1, 22, 0.71],\n", - " [1, 77, 0.55],\n", - " [1, 9, 0.52],\n", - " [2, 11, 0.85],\n", - " [2, 13, 0.69],\n", - " [2, 25, 0.64],\n", - " [2, 6, 0.60],\n", - " [2, 77, 0.53]\n", - " \n", - " ], columns=['user_id', 'item_id', 'score'])\n", - "\n", - "display(HTML(recommendations.to_html()))\n", - "\n", - "real_interactions = pd.DataFrame(\n", - " [\n", - " [1, 45],\n", - " [1, 22],\n", - " [1, 77],\n", - " [2, 13],\n", - " [2, 77]\n", - " \n", - " ], columns=['user_id', 'item_id'])\n", - "\n", - "display(HTML(real_interactions.to_html()))\n", - " \n", - "print(\"NDCG@3 = {:.4f}\".format(ndcg(recommendations, real_interactions, n=3)))" - ] - }, - { - "cell_type": "markdown", - "id": "appointed-baltimore", - "metadata": {}, - "source": [ - "# Testing routines (offline)" - ] - }, - { - "cell_type": "markdown", - "id": "bizarre-elevation", - "metadata": {}, - "source": [ - "## Train and test set split" - ] - }, - { - "cell_type": "markdown", - "id": "fatty-blackjack", - "metadata": {}, - "source": [ - "### Explicit feedback" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "negative-cigarette", - "metadata": {}, - "outputs": [], - "source": [ - "def evaluate_train_test_split_explicit(recommender, interactions_df, items_df, seed=6789):\n", - " rng = np.random.RandomState(seed=seed)\n", - " \n", - " # Split the dataset into train and test\n", - " \n", - " shuffle = np.arange(len(interactions_df))\n", - " rng.shuffle(shuffle)\n", - " shuffle = list(shuffle)\n", - "\n", - " train_test_split = 0.8\n", - " split_index = int(len(interactions_df) * train_test_split)\n", - "\n", - " interactions_df_train = interactions_df.iloc[shuffle[:split_index]]\n", - " interactions_df_test = interactions_df.iloc[shuffle[split_index:]]\n", - " \n", - " # Train the recommender\n", - " \n", - " recommender.fit(interactions_df_train, None, items_df)\n", - " \n", - " # Gather predictions\n", - " \n", - " r_pred = []\n", - " \n", - " for idx, row in interactions_df_test.iterrows():\n", - " users_df = pd.DataFrame([row['user_id']], columns=['user_id'])\n", - " eval_items_df = pd.DataFrame([row['item_id']], columns=['item_id'])\n", - " eval_items_df = pd.merge(eval_items_df, items_df, on='item_id')\n", - " recommendations = recommender.recommend(users_df, eval_items_df, n_recommendations=1)\n", - " \n", - " r_pred.append(recommendations.iloc[0]['score'])\n", - " \n", - " # Gather real ratings\n", - " \n", - " r_real = np.array(interactions_df_test['rating'].tolist())\n", - " \n", - " # Return evaluation metrics\n", - " \n", - " return rmse(r_pred, r_real), mre(r_pred, r_real), tre(r_pred, r_real)\n", - "\n", - "recommender = Recommender()\n", - "\n", - "results = [['BaseRecommender'] + list(evaluate_train_test_split_explicit(\n", - " recommender, ml_ratings_df.loc[:, ['user_id', 'item_id', 'rating']], ml_movies_df))]\n", - "\n", - "results = pd.DataFrame(results, \n", - " columns=['Recommender', 'RMSE', 'MRE', 'TRE'])\n", - "\n", - "display(HTML(results.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "naval-croatia", - "metadata": {}, - "source": [ - "### Implicit feedback" - ] - }, - { - "cell_type": "markdown", - "id": "separated-enclosure", - "metadata": {}, - "source": [ - "**Task 1.** Implement the following method for train-test split evaluation for implicit feedback." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "considerable-sunrise", - "metadata": {}, - "outputs": [], - "source": [ - "def evaluate_train_test_split_implicit(recommender, interactions_df, items_df, seed=6789):\n", - " # Write your code here\n", - " pass" - ] - }, - { - "cell_type": "markdown", - "id": "muslim-tunisia", - "metadata": {}, - "source": [ - "## Leave-one-out, leave-k-out, cross-validation" - ] - }, - { - "cell_type": "markdown", - "id": "adjusted-spirit", - "metadata": {}, - "source": [ - "### Explicit feedback" - ] - }, - { - "cell_type": "markdown", - "id": "alpine-luxembourg", - "metadata": {}, - "source": [ - "**Task 2.** Implement the following method for leave-one-out evaluation for explicit feedback." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "flexible-runner", - "metadata": {}, - "outputs": [], - "source": [ - "def evaluate_leave_one_out_explicit(recommender, interactions_df, items_df, max_evals=100, seed=6789):\n", - " # Write your code here\n", - " pass" - ] - }, - { - "cell_type": "markdown", - "id": "engaged-lloyd", - "metadata": {}, - "source": [ - "### Implicit feedback" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "surrounded-newton", - "metadata": {}, - "outputs": [], - "source": [ - "def evaluate_leave_one_out_implicit(recommender, interactions_df, items_df, max_evals=10, seed=6789):\n", - " rng = np.random.RandomState(seed=seed)\n", - " \n", - " # Prepare splits of the datasets\n", - " kf = KFold(n_splits=len(interactions_df), random_state=rng, shuffle=True)\n", - " \n", - " hr_1 = []\n", - " hr_3 = []\n", - " hr_5 = []\n", - " hr_10 = []\n", - " ndcg_1 = []\n", - " ndcg_3 = []\n", - " ndcg_5 = []\n", - " ndcg_10 = []\n", - " \n", - " # For each split of the dataset train the recommender, generate recommendations and evaluate\n", - " \n", - " n_eval = 1\n", - " for train_index, test_index in kf.split(interactions_df.index):\n", - " interactions_df_train = interactions_df.loc[interactions_df.index[train_index]]\n", - " interactions_df_test = interactions_df.loc[interactions_df.index[test_index]]\n", - " \n", - " recommender.fit(interactions_df_train, None, items_df)\n", - " recommendations = recommender.recommend(interactions_df_test.loc[:, ['user_id']], items_df, n_recommendations=10)\n", - " \n", - " hr_1.append(hr(recommendations, interactions_df_test, n=1))\n", - " hr_3.append(hr(recommendations, interactions_df_test, n=3))\n", - " hr_5.append(hr(recommendations, interactions_df_test, n=5))\n", - " hr_10.append(hr(recommendations, interactions_df_test, n=10))\n", - " ndcg_1.append(ndcg(recommendations, interactions_df_test, n=1))\n", - " ndcg_3.append(ndcg(recommendations, interactions_df_test, n=3))\n", - " ndcg_5.append(ndcg(recommendations, interactions_df_test, n=5))\n", - " ndcg_10.append(ndcg(recommendations, interactions_df_test, n=10))\n", - " \n", - " if n_eval == max_evals:\n", - " break\n", - " n_eval += 1\n", - " \n", - " hr_1 = np.mean(hr_1)\n", - " hr_3 = np.mean(hr_3)\n", - " hr_5 = np.mean(hr_5)\n", - " hr_10 = np.mean(hr_10)\n", - " ndcg_1 = np.mean(ndcg_1)\n", - " ndcg_3 = np.mean(ndcg_3)\n", - " ndcg_5 = np.mean(ndcg_5)\n", - " ndcg_10 = np.mean(ndcg_10)\n", - " \n", - " return hr_1, hr_3, hr_5, hr_10, ndcg_1, ndcg_3, ndcg_5, ndcg_10\n", - "\n", - "recommender = Recommender()\n", - "\n", - "results = [['BaseRecommender'] + list(evaluate_leave_one_out_implicit(\n", - " recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]\n", - "\n", - "results = pd.DataFrame(results, \n", - " columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(results.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "optional-chain", - "metadata": {}, - "source": [ - "# Linear Regression Recommender\n", - "\n", - "For every movie we transform its genres into one-hot encoded features and then fit a linear regression model to those features and actual ratings." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "sonic-horror", - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.linear_model import LinearRegression\n", - "from sklearn.preprocessing import MultiLabelBinarizer\n", - "\n", - "class LinearRegressionRecommender(object):\n", - " \"\"\"\n", - " Base recommender class.\n", - " \"\"\"\n", - " \n", - " def __init__(self):\n", - " \"\"\"\n", - " Initialize base recommender params and variables.\n", - " \"\"\"\n", - " self.model = None\n", - " self.mlb = None\n", - " \n", - " def fit(self, interactions_df, users_df, items_df):\n", - " \"\"\"\n", - " Training of the recommender.\n", - " \n", - " :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items \n", - " defined by user_id, item_id and features of the interaction.\n", - " :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.\n", - " \"\"\"\n", - " \n", - " interactions_df = pd.merge(interactions_df, items_df, on='item_id')\n", - " interactions_df.loc[:, 'genres'] = interactions_df['genres'].str.replace(\"-\", \"_\", regex=False)\n", - " interactions_df.loc[:, 'genres'] = interactions_df['genres'].str.replace(\" \", \"_\", regex=False)\n", - " interactions_df.loc[:, 'genres'] = interactions_df['genres'].str.lower()\n", - " interactions_df.loc[:, 'genres'] = interactions_df['genres'].str.split(\"|\")\n", - " \n", - " self.mlb = MultiLabelBinarizer()\n", - " interactions_df = interactions_df.join(\n", - " pd.DataFrame(self.mlb.fit_transform(interactions_df.pop('genres')),\n", - " columns=self.mlb.classes_,\n", - " index=interactions_df.index))\n", - " \n", - "# print(interactions_df.head())\n", - " \n", - " x = interactions_df.loc[:, self.mlb.classes_].values\n", - " y = interactions_df['rating'].values\n", - " \n", - " self.model = LinearRegression().fit(x, y)\n", - " \n", - " def recommend(self, users_df, items_df, n_recommendations=1):\n", - " \"\"\"\n", - " Serving of recommendations. Scores items in items_df for each user in users_df and returns \n", - " top n_recommendations for each user.\n", - " \n", - " :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.\n", - " :param int n_recommendations: Number of recommendations to be returned for each user.\n", - " :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations \n", - " for each user.\n", - " :rtype: pd.DataFrame\n", - " \"\"\"\n", - " \n", - " # Transform the item to be scored into proper features\n", - " \n", - " items_df = items_df.copy()\n", - " items_df.loc[:, 'genres'] = items_df['genres'].str.replace(\"-\", \"_\", regex=False)\n", - " items_df.loc[:, 'genres'] = items_df['genres'].str.replace(\" \", \"_\", regex=False)\n", - " items_df.loc[:, 'genres'] = items_df['genres'].str.lower()\n", - " items_df.loc[:, 'genres'] = items_df['genres'].str.split(\"|\")\n", - " \n", - " items_df = items_df.join(\n", - " pd.DataFrame(self.mlb.transform(items_df.pop('genres')),\n", - " columns=self.mlb.classes_,\n", - " index=items_df.index))\n", - " \n", - "# print(items_df)\n", - " \n", - " # Score the item\n", - " \n", - " recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n", - " \n", - " for ix, user in users_df.iterrows():\n", - " score = self.model.predict(items_df.loc[:, self.mlb.classes_].values)[0]\n", - " \n", - " user_recommendations = pd.DataFrame({'user_id': [user['user_id']],\n", - " 'item_id': items_df.iloc[0]['item_id'],\n", - " 'score': score})\n", - "\n", - " recommendations = pd.concat([recommendations, user_recommendations])\n", - "\n", - " return recommendations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "colored-favorite", - "metadata": {}, - "outputs": [], - "source": [ - "# Quick test of the recommender\n", - "\n", - "lr_recommender = LinearRegressionRecommender()\n", - "lr_recommender.fit(ml_ratings_df, None, ml_movies_df)\n", - "recommendations = lr_recommender.recommend(pd.DataFrame([[1], [2]], columns=['user_id']), ml_movies_df, 1)\n", - "\n", - "recommendations = pd.merge(recommendations, ml_movies_df, on='item_id')\n", - "display(HTML(recommendations.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "national-sight", - "metadata": {}, - "outputs": [], - "source": [ - "lr_recommender = LinearRegressionRecommender()\n", - "\n", - "results = [['LinearRegressionRecommender'] + list(evaluate_train_test_split_explicit(\n", - " lr_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id', 'rating']], ml_movies_df, seed=6789))]\n", - "\n", - "results = pd.DataFrame(results, \n", - " columns=['Recommender', 'RMSE', 'MRE', 'TRE'])\n", - "\n", - "display(HTML(results.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "static-mozambique", - "metadata": {}, - "source": [ - "# TF-IDF Recommender\n", - "TF-IDF stands for term frequency–inverse document frequency. Typically Tf-IDF method is used to assign keywords (words describing the gist of a document) to documents in a corpus of documents.\n", - "\n", - "In our case we will treat users as documents and genres as words.\n", - "\n", - "Term-frequency is given by the following formula:\n", - "
\n", - "$$\n", - " \\text{tf}(g, u) = f_{g, u}\n", - "$$\n", - "
\n", - "where $f_{g, i}$ is the number of times genre $g$ appear for movies watched by user $u$.\n", - "\n", - "Inverse document frequency is defined as follows:\n", - "
\n", - "$$\n", - " \\text{idf}(g) = \\log \\frac{N}{n_g}\n", - "$$\n", - "
\n", - "where $N$ is the number of users and $n_g$ is the number of users with $g$ in their genres list.\n", - "\n", - "Finally, tf-idf is defined as follows:\n", - "
\n", - "$$\n", - " \\text{tfidf}(g, u) = \\text{tf}(g, u) \\cdot \\text{idf}(g)\n", - "$$\n", - "
\n", - "\n", - "In our case we will measure how often a given genre appears for movies watched by a given user vs how often it appears for all users. To obtain a movie score we will take the average of its genres' scores for this user." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "infrared-southwest", - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.feature_extraction.text import TfidfVectorizer\n", - "\n", - "class TFIDFRecommender(object):\n", - " \"\"\"\n", - " Recommender based on the TF-IDF method.\n", - " \"\"\"\n", - " \n", - " def __init__(self):\n", - " \"\"\"\n", - " Initialize base recommender params and variables.\n", - " \"\"\"\n", - " self.tfidf_scores = None\n", - " \n", - " def fit(self, interactions_df, users_df, items_df):\n", - " \"\"\"\n", - " Training of the recommender.\n", - " \n", - " :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items \n", - " defined by user_id, item_id and features of the interaction.\n", - " :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.\n", - " \"\"\"\n", - " \n", - " self.tfidf_scores = defaultdict(lambda: 0.0)\n", - "\n", - " # Prepare the corpus for tfidf calculation\n", - " \n", - " interactions_df = pd.merge(interactions_df, items_df, on='item_id')\n", - " user_genres = interactions_df.loc[:, ['user_id', 'genres']]\n", - " user_genres.loc[:, 'genres'] = user_genres['genres'].str.replace(\"-\", \"_\", regex=False)\n", - " user_genres.loc[:, 'genres'] = user_genres['genres'].str.replace(\" \", \"_\", regex=False)\n", - " user_genres = user_genres.groupby('user_id').aggregate(lambda x: \"|\".join(x))\n", - " user_genres.loc[:, 'genres'] = user_genres['genres'].str.replace(\"|\", \" \", regex=False)\n", - "# print(user_genres)\n", - " user_ids = user_genres.index.tolist()\n", - " genres_corpus = user_genres['genres'].tolist()\n", - " \n", - " # Calculate tf-idf scores\n", - " \n", - " vectorizer = TfidfVectorizer()\n", - " tfidf_scores = vectorizer.fit_transform(genres_corpus)\n", - " \n", - " # Transform results into a dict {(user_id, genre): score}\n", - " \n", - " for u in range(tfidf_scores.shape[0]):\n", - " for g in range(tfidf_scores.shape[1]):\n", - " self.tfidf_scores[(user_ids[u], vectorizer.get_feature_names()[g])] = tfidf_scores[u, g]\n", - " \n", - "# print(self.tfidf_scores)\n", - " \n", - " def recommend(self, users_df, items_df, n_recommendations=1):\n", - " \"\"\"\n", - " Serving of recommendations. Scores items in items_df for each user in users_df and returns \n", - " top n_recommendations for each user.\n", - " \n", - " :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.\n", - " :param int n_recommendations: Number of recommendations to be returned for each user.\n", - " :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations \n", - " for each user.\n", - " :rtype: pd.DataFrame\n", - " \"\"\"\n", - " \n", - " recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n", - " \n", - " # Transform genres to a unified form used by the vectorizer\n", - " \n", - " items_df = items_df.copy()\n", - " items_df.loc[:, 'genres'] = items_df['genres'].str.replace(\"-\", \"_\", regex=False)\n", - " items_df.loc[:, 'genres'] = items_df['genres'].str.replace(\" \", \"_\", regex=False)\n", - " items_df.loc[:, 'genres'] = items_df['genres'].str.lower()\n", - " items_df.loc[:, 'genres'] = items_df['genres'].str.split(\"|\")\n", - " \n", - " # Score items \n", - " \n", - " for uix, user in users_df.iterrows():\n", - " items = []\n", - " for iix, item in items_df.iterrows():\n", - " score = 0.0\n", - " for genre in item['genres']:\n", - " score += self.tfidf_scores[(user['user_id'], genre)]\n", - " score /= len(item['genres'])\n", - " items.append((item['item_id'], score))\n", - " \n", - " items = sorted(items, key=lambda x: x[1], reverse=True)\n", - " user_recommendations = pd.DataFrame({'user_id': user['user_id'],\n", - " 'item_id': [item[0] for item in items][:n_recommendations],\n", - " 'score': [item[1] for item in items][:n_recommendations]})\n", - "\n", - " recommendations = pd.concat([recommendations, user_recommendations])\n", - "\n", - " return recommendations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "oriented-service", - "metadata": {}, - "outputs": [], - "source": [ - "# Quick test of the recommender\n", - "\n", - "tfidf_recommender = TFIDFRecommender()\n", - "tfidf_recommender.fit(ml_ratings_df, None, ml_movies_df)\n", - "recommendations = tfidf_recommender.recommend(pd.DataFrame([[1], [2]], columns=['user_id']), ml_movies_df, 3)\n", - "\n", - "recommendations = pd.merge(recommendations, ml_movies_df, on='item_id')\n", - "display(HTML(recommendations.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "qualified-westminster", - "metadata": {}, - "outputs": [], - "source": [ - "tfidf_recommender = TFIDFRecommender()\n", - "\n", - "results = [['TFIDFRecommender'] + list(evaluate_leave_one_out_implicit(\n", - " tfidf_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]\n", - "\n", - "results = pd.DataFrame(results, \n", - " columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(results.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "beautiful-snapshot", - "metadata": {}, - "source": [ - "# Tasks" - ] - }, - { - "cell_type": "markdown", - "id": "growing-maria", - "metadata": {}, - "source": [ - "**Task 3.** Implement the MostPopularRecommender (check the slides for class 1), evaluate it with leave-one-out procedure for implicit feedback, print HR@1, HR@3, HR@5, HR@10, NDCG@1, NDCG@3, NDCG@5, NDCG@10." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "strategic-commons", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "black-schedule", - "metadata": {}, - "source": [ - "**Task 4.** Implement the HighestRatedRecommender (check the slides for class 1), evaluate it with leave-one-out procedure for implicit feedback, print HR@1, HR@3, HR@5, HR@10, NDCG@1, NDCG@3, NDCG@5, NDCG@10." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "likely-vatican", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "handy-palmer", - "metadata": {}, - "source": [ - "**Task 5.** Implement the RandomRecommender (check the slides for class 1), evaluate it with leave-one-out procedure for implicit feedback, print HR@1, HR@3, HR@5, HR@10, NDCG@1, NDCG@3, NDCG@5, NDCG@10." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "minute-cliff", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "animal-heart", - "metadata": {}, - "source": [ - "**Task 6.** Gather the results for TFIDFRecommender, MostPopularRecommender, HighestRatedRecommender, RandomRecommender in one DataFrame and print it." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "optical-creator", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "id": "visible-burlington", - "metadata": {}, - "source": [ - "**Task 7\\*.** Implement an SVRRecommender - one-hot encode genres and fit an SVR model to \n", - "\n", - "(genre_1, genre_2, ..., genre_N) -> rating\n", - "\n", - "Tune params of the SVR model to obtain as good results as you can. \n", - "\n", - "To do tuning properly (although in practive people are often happy with leave-one-out and do not bother with dividing the set into training, validation and test sets):\n", - " - divide the set into training, validation and test sets (randomly divide the dataset in proportions 60%-20%-20%),\n", - " - train the model with different sets of tunable parameters on the training set, \n", - " - choose the best tunable params based on results on the validation set, \n", - " - provide the final evaluation metrics on the test set for the best model obtained during tuning.\n", - "\n", - "Recommended method of tuning: use hyperopt. Install the package using the following command: `pip install hyperopt`\n", - " \n", - "Print the RMSE and MAE on the test set generated with numpy with seed 6789." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "promotional-gregory", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/class_5_amazon_recommender.ipynb b/class_5_amazon_recommender.ipynb deleted file mode 100644 index 4ddc7a3..0000000 --- a/class_5_amazon_recommender.ipynb +++ /dev/null @@ -1,1719 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 112, - "id": "verified-accommodation", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], - "source": [ - "%matplotlib inline\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "from IPython.display import Markdown, display, HTML\n", - "from collections import defaultdict\n", - "from sklearn.model_selection import KFold\n", - "import scipy.special as scisp\n", - "\n", - "# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n", - "import os\n", - "os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'" - ] - }, - { - "cell_type": "markdown", - "id": "educated-tourist", - "metadata": {}, - "source": [ - "# Load data" - ] - }, - { - "cell_type": "code", - "execution_count": 113, - "id": "looking-feeling", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
item_idtitlegenres
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
12Jumanji (1995)Adventure|Children|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama|Romance
45Father of the Bride Part II (1995)Comedy
56Heat (1995)Action|Crime|Thriller
67Sabrina (1995)Comedy|Romance
78Tom and Huck (1995)Adventure|Children
89Sudden Death (1995)Action
910GoldenEye (1995)Action|Adventure|Thriller
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of interactions left: 1170\n" - ] - } - ], - "source": [ - "ml_ratings_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"ratings.csv\")).rename(columns={'userId': 'user_id', 'movieId': 'item_id'})\n", - "ml_movies_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"movies.csv\")).rename(columns={'movieId': 'item_id'})\n", - "ml_df = pd.merge(ml_ratings_df, ml_movies_df, on='item_id')\n", - "ml_df.head(10)\n", - "\n", - "display(HTML(ml_movies_df.head(10).to_html()))\n", - "\n", - "# Filter the data to reduce the number of movies\n", - "seed = 6789\n", - "rng = np.random.RandomState(seed=seed)\n", - "left_ids = rng.choice(ml_movies_df['item_id'], size=100, replace=False)\n", - "\n", - "ml_ratings_df = ml_ratings_df.loc[ml_ratings_df['item_id'].isin(left_ids)]\n", - "ml_movies_df = ml_movies_df.loc[ml_movies_df['item_id'].isin(left_ids)]\n", - "ml_df = ml_df.loc[ml_df['item_id'].isin(left_ids)]\n", - "\n", - "print(\"Number of interactions left: {}\".format(len(ml_ratings_df)))" - ] - }, - { - "cell_type": "markdown", - "id": "protecting-recognition", - "metadata": {}, - "source": [ - "# Inner workings of the Amazon recommender fit method" - ] - }, - { - "cell_type": "markdown", - "id": "plastic-brooklyn", - "metadata": {}, - "source": [ - "## Shift item ids and user ids so that they are consecutive" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "id": "valuable-modem", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Item mapping\n", - "{780: 0, 1500: 1, 3479: 2, 171: 3, 1914: 4, 4896: 5, 145: 6, 267: 7, 355: 8, 435: 9, 6502: 10, 73323: 11, 112421: 12, 1783: 13, 2806: 14, 3040: 15, 3551: 16, 2135: 17, 39715: 18, 41566: 19, 5673: 20, 7064: 21, 481: 22, 6537: 23, 44761: 24, 2690: 25, 228: 26, 4890: 27, 3614: 28, 3507: 29, 3628: 30, 5954: 31, 8605: 32, 3786: 33, 6755: 34, 3468: 35, 50601: 36, 3089: 37, 55444: 38, 118270: 39, 124404: 40, 3768: 41, 233: 42, 3687: 43, 171749: 44, 104218: 45, 182749: 46, 3342: 47, 65130: 48, 84952: 49, 152970: 50, 3067: 51, 4031: 52, 1107: 53, 47382: 54, 3801: 55, 5155: 56, 5612: 57, 5214: 58, 67295: 59, 3165: 60, 1752: 61, 31223: 62, 6713: 63, 66783: 64, 2043: 65, 2903: 66, 3313: 67, 4009: 68, 91842: 69, 2190: 70, 7282: 71, 4483: 72, 2275: 73, 3567: 74, 190207: 75, 4505: 76, 95147: 77, 4552: 78, 6033: 79, 2521: 80, 4397: 81, 151315: 82, 156706: 83, 151311: 84, 959: 85, 3714: 86, 4164: 87, 4796: 88, 31260: 89, 6927: 90, 126142: 91, 73804: 92, 26357: 93, 82684: 94, 6342: 95, 32799: 96, 31921: 97, 2892: 98, 2737: 99}\n", - "\n", - "Item reverse mapping\n", - "{0: 780, 1: 1500, 2: 3479, 3: 171, 4: 1914, 5: 4896, 6: 145, 7: 267, 8: 355, 9: 435, 10: 6502, 11: 73323, 12: 112421, 13: 1783, 14: 2806, 15: 3040, 16: 3551, 17: 2135, 18: 39715, 19: 41566, 20: 5673, 21: 7064, 22: 481, 23: 6537, 24: 44761, 25: 2690, 26: 228, 27: 4890, 28: 3614, 29: 3507, 30: 3628, 31: 5954, 32: 8605, 33: 3786, 34: 6755, 35: 3468, 36: 50601, 37: 3089, 38: 55444, 39: 118270, 40: 124404, 41: 3768, 42: 233, 43: 3687, 44: 171749, 45: 104218, 46: 182749, 47: 3342, 48: 65130, 49: 84952, 50: 152970, 51: 3067, 52: 4031, 53: 1107, 54: 47382, 55: 3801, 56: 5155, 57: 5612, 58: 5214, 59: 67295, 60: 3165, 61: 1752, 62: 31223, 63: 6713, 64: 66783, 65: 2043, 66: 2903, 67: 3313, 68: 4009, 69: 91842, 70: 2190, 71: 7282, 72: 4483, 73: 2275, 74: 3567, 75: 190207, 76: 4505, 77: 95147, 78: 4552, 79: 6033, 80: 2521, 81: 4397, 82: 151315, 83: 156706, 84: 151311, 85: 959, 86: 3714, 87: 4164, 88: 4796, 89: 31260, 90: 6927, 91: 126142, 92: 73804, 93: 26357, 94: 82684, 95: 6342, 96: 32799, 97: 31921, 98: 2892, 99: 2737}\n", - "\n", - "User mapping\n", - "{1: 0, 4: 1, 6: 2, 7: 3, 11: 4, 15: 5, 17: 6, 18: 7, 19: 8, 20: 9, 21: 10, 22: 11, 23: 12, 24: 13, 27: 14, 28: 15, 29: 16, 31: 17, 32: 18, 33: 19, 34: 20, 36: 21, 38: 22, 39: 23, 40: 24, 41: 25, 42: 26, 43: 27, 44: 28, 45: 29, 46: 30, 48: 31, 50: 32, 51: 33, 53: 34, 57: 35, 58: 36, 59: 37, 61: 38, 62: 39, 63: 40, 64: 41, 66: 42, 67: 43, 68: 44, 70: 45, 71: 46, 72: 47, 73: 48, 74: 49, 75: 50, 76: 51, 78: 52, 80: 53, 82: 54, 83: 55, 84: 56, 86: 57, 88: 58, 89: 59, 90: 60, 91: 61, 94: 62, 95: 63, 96: 64, 99: 65, 100: 66, 101: 67, 103: 68, 104: 69, 105: 70, 106: 71, 108: 72, 109: 73, 111: 74, 112: 75, 113: 76, 114: 77, 115: 78, 116: 79, 117: 80, 120: 81, 121: 82, 122: 83, 125: 84, 129: 85, 132: 86, 133: 87, 135: 88, 136: 89, 137: 90, 139: 91, 140: 92, 141: 93, 142: 94, 144: 95, 148: 96, 149: 97, 150: 98, 151: 99, 153: 100, 154: 101, 156: 102, 158: 103, 160: 104, 161: 105, 162: 106, 164: 107, 165: 108, 166: 109, 167: 110, 169: 111, 170: 112, 171: 113, 173: 114, 174: 115, 175: 116, 176: 117, 177: 118, 178: 119, 179: 120, 181: 121, 182: 122, 184: 123, 186: 124, 187: 125, 190: 126, 194: 127, 195: 128, 198: 129, 199: 130, 200: 131, 201: 132, 202: 133, 203: 134, 204: 135, 205: 136, 206: 137, 210: 138, 212: 139, 213: 140, 214: 141, 215: 142, 216: 143, 217: 144, 219: 145, 220: 146, 221: 147, 222: 148, 223: 149, 226: 150, 229: 151, 230: 152, 232: 153, 233: 154, 234: 155, 235: 156, 236: 157, 239: 158, 240: 159, 243: 160, 244: 161, 246: 162, 247: 163, 249: 164, 254: 165, 256: 166, 257: 167, 260: 168, 262: 169, 263: 170, 264: 171, 265: 172, 266: 173, 269: 174, 270: 175, 271: 176, 273: 177, 274: 178, 275: 179, 276: 180, 277: 181, 279: 182, 280: 183, 282: 184, 283: 185, 284: 186, 287: 187, 288: 188, 290: 189, 291: 190, 292: 191, 294: 192, 297: 193, 298: 194, 301: 195, 302: 196, 303: 197, 304: 198, 305: 199, 306: 200, 307: 201, 308: 202, 310: 203, 312: 204, 313: 205, 314: 206, 318: 207, 321: 208, 322: 209, 325: 210, 328: 211, 330: 212, 331: 213, 332: 214, 333: 215, 334: 216, 335: 217, 337: 218, 338: 219, 339: 220, 340: 221, 341: 222, 345: 223, 347: 224, 349: 225, 352: 226, 353: 227, 354: 228, 356: 229, 357: 230, 359: 231, 361: 232, 364: 233, 365: 234, 366: 235, 367: 236, 368: 237, 369: 238, 370: 239, 373: 240, 374: 241, 376: 242, 380: 243, 381: 244, 382: 245, 383: 246, 384: 247, 385: 248, 386: 249, 387: 250, 389: 251, 391: 252, 395: 253, 399: 254, 402: 255, 408: 256, 409: 257, 410: 258, 411: 259, 412: 260, 413: 261, 414: 262, 415: 263, 417: 264, 419: 265, 420: 266, 422: 267, 423: 268, 425: 269, 426: 270, 427: 271, 428: 272, 431: 273, 432: 274, 434: 275, 436: 276, 437: 277, 438: 278, 440: 279, 445: 280, 446: 281, 447: 282, 448: 283, 451: 284, 452: 285, 453: 286, 455: 287, 456: 288, 460: 289, 462: 290, 463: 291, 464: 292, 465: 293, 466: 294, 467: 295, 469: 296, 474: 297, 475: 298, 477: 299, 479: 300, 480: 301, 482: 302, 483: 303, 484: 304, 486: 305, 489: 306, 490: 307, 491: 308, 492: 309, 495: 310, 500: 311, 501: 312, 503: 313, 504: 314, 505: 315, 509: 316, 510: 317, 511: 318, 513: 319, 514: 320, 517: 321, 521: 322, 522: 323, 524: 324, 525: 325, 527: 326, 529: 327, 533: 328, 534: 329, 536: 330, 537: 331, 540: 332, 542: 333, 543: 334, 544: 335, 552: 336, 553: 337, 555: 338, 556: 339, 557: 340, 558: 341, 559: 342, 560: 343, 561: 344, 562: 345, 563: 346, 564: 347, 566: 348, 567: 349, 570: 350, 573: 351, 577: 352, 579: 353, 580: 354, 581: 355, 584: 356, 585: 357, 586: 358, 587: 359, 589: 360, 590: 361, 592: 362, 593: 363, 594: 364, 595: 365, 596: 366, 597: 367, 599: 368, 600: 369, 602: 370, 603: 371, 604: 372, 605: 373, 606: 374, 607: 375, 608: 376, 610: 377}\n", - "\n", - "User reverse mapping\n", - "{0: 1, 1: 4, 2: 6, 3: 7, 4: 11, 5: 15, 6: 17, 7: 18, 8: 19, 9: 20, 10: 21, 11: 22, 12: 23, 13: 24, 14: 27, 15: 28, 16: 29, 17: 31, 18: 32, 19: 33, 20: 34, 21: 36, 22: 38, 23: 39, 24: 40, 25: 41, 26: 42, 27: 43, 28: 44, 29: 45, 30: 46, 31: 48, 32: 50, 33: 51, 34: 53, 35: 57, 36: 58, 37: 59, 38: 61, 39: 62, 40: 63, 41: 64, 42: 66, 43: 67, 44: 68, 45: 70, 46: 71, 47: 72, 48: 73, 49: 74, 50: 75, 51: 76, 52: 78, 53: 80, 54: 82, 55: 83, 56: 84, 57: 86, 58: 88, 59: 89, 60: 90, 61: 91, 62: 94, 63: 95, 64: 96, 65: 99, 66: 100, 67: 101, 68: 103, 69: 104, 70: 105, 71: 106, 72: 108, 73: 109, 74: 111, 75: 112, 76: 113, 77: 114, 78: 115, 79: 116, 80: 117, 81: 120, 82: 121, 83: 122, 84: 125, 85: 129, 86: 132, 87: 133, 88: 135, 89: 136, 90: 137, 91: 139, 92: 140, 93: 141, 94: 142, 95: 144, 96: 148, 97: 149, 98: 150, 99: 151, 100: 153, 101: 154, 102: 156, 103: 158, 104: 160, 105: 161, 106: 162, 107: 164, 108: 165, 109: 166, 110: 167, 111: 169, 112: 170, 113: 171, 114: 173, 115: 174, 116: 175, 117: 176, 118: 177, 119: 178, 120: 179, 121: 181, 122: 182, 123: 184, 124: 186, 125: 187, 126: 190, 127: 194, 128: 195, 129: 198, 130: 199, 131: 200, 132: 201, 133: 202, 134: 203, 135: 204, 136: 205, 137: 206, 138: 210, 139: 212, 140: 213, 141: 214, 142: 215, 143: 216, 144: 217, 145: 219, 146: 220, 147: 221, 148: 222, 149: 223, 150: 226, 151: 229, 152: 230, 153: 232, 154: 233, 155: 234, 156: 235, 157: 236, 158: 239, 159: 240, 160: 243, 161: 244, 162: 246, 163: 247, 164: 249, 165: 254, 166: 256, 167: 257, 168: 260, 169: 262, 170: 263, 171: 264, 172: 265, 173: 266, 174: 269, 175: 270, 176: 271, 177: 273, 178: 274, 179: 275, 180: 276, 181: 277, 182: 279, 183: 280, 184: 282, 185: 283, 186: 284, 187: 287, 188: 288, 189: 290, 190: 291, 191: 292, 192: 294, 193: 297, 194: 298, 195: 301, 196: 302, 197: 303, 198: 304, 199: 305, 200: 306, 201: 307, 202: 308, 203: 310, 204: 312, 205: 313, 206: 314, 207: 318, 208: 321, 209: 322, 210: 325, 211: 328, 212: 330, 213: 331, 214: 332, 215: 333, 216: 334, 217: 335, 218: 337, 219: 338, 220: 339, 221: 340, 222: 341, 223: 345, 224: 347, 225: 349, 226: 352, 227: 353, 228: 354, 229: 356, 230: 357, 231: 359, 232: 361, 233: 364, 234: 365, 235: 366, 236: 367, 237: 368, 238: 369, 239: 370, 240: 373, 241: 374, 242: 376, 243: 380, 244: 381, 245: 382, 246: 383, 247: 384, 248: 385, 249: 386, 250: 387, 251: 389, 252: 391, 253: 395, 254: 399, 255: 402, 256: 408, 257: 409, 258: 410, 259: 411, 260: 412, 261: 413, 262: 414, 263: 415, 264: 417, 265: 419, 266: 420, 267: 422, 268: 423, 269: 425, 270: 426, 271: 427, 272: 428, 273: 431, 274: 432, 275: 434, 276: 436, 277: 437, 278: 438, 279: 440, 280: 445, 281: 446, 282: 447, 283: 448, 284: 451, 285: 452, 286: 453, 287: 455, 288: 456, 289: 460, 290: 462, 291: 463, 292: 464, 293: 465, 294: 466, 295: 467, 296: 469, 297: 474, 298: 475, 299: 477, 300: 479, 301: 480, 302: 482, 303: 483, 304: 484, 305: 486, 306: 489, 307: 490, 308: 491, 309: 492, 310: 495, 311: 500, 312: 501, 313: 503, 314: 504, 315: 505, 316: 509, 317: 510, 318: 511, 319: 513, 320: 514, 321: 517, 322: 521, 323: 522, 324: 524, 325: 525, 326: 527, 327: 529, 328: 533, 329: 534, 330: 536, 331: 537, 332: 540, 333: 542, 334: 543, 335: 544, 336: 552, 337: 553, 338: 555, 339: 556, 340: 557, 341: 558, 342: 559, 343: 560, 344: 561, 345: 562, 346: 563, 347: 564, 348: 566, 349: 567, 350: 570, 351: 573, 352: 577, 353: 579, 354: 580, 355: 581, 356: 584, 357: 585, 358: 586, 359: 587, 360: 589, 361: 590, 362: 592, 363: 593, 364: 594, 365: 595, 366: 596, 367: 597, 368: 599, 369: 600, 370: 602, 371: 603, 372: 604, 373: 605, 374: 606, 375: 607, 376: 608, 377: 610}\n", - "\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iditem_idratingtimestamp
42003.0964984086
97014.0964980985
216024.0964981725
310133.0945078428
398114.0964622830
416144.0964622714
513154.01007574532
616264.0845553966
629233.0845555402
677273.0845554376
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "interactions_df = ml_ratings_df.copy()\n", - "\n", - "unique_item_ids = interactions_df['item_id'].unique()\n", - "item_id_mapping = dict(zip(unique_item_ids, list(range(len(unique_item_ids)))))\n", - "item_id_reverse_mapping = dict(zip(list(range(len(unique_item_ids))), unique_item_ids))\n", - "unique_user_ids = interactions_df['user_id'].unique()\n", - "user_id_mapping = dict(zip(unique_user_ids, list(range(len(unique_user_ids)))))\n", - "user_id_reverse_mapping = dict(zip(list(range(len(unique_user_ids))), unique_user_ids))\n", - "\n", - "interactions_df.replace({'item_id': item_id_mapping, 'user_id': user_id_mapping}, inplace=True)\n", - "\n", - "print(\"Item mapping\")\n", - "print(item_id_mapping)\n", - "print()\n", - "\n", - "print(\"Item reverse mapping\")\n", - "print(item_id_reverse_mapping)\n", - "print()\n", - "\n", - "print(\"User mapping\")\n", - "print(user_id_mapping)\n", - "print()\n", - "\n", - "print(\"User reverse mapping\")\n", - "print(user_id_reverse_mapping)\n", - "print()\n", - "\n", - "display(HTML(interactions_df.head(10).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "basic-meeting", - "metadata": {}, - "source": [ - "## Get the number of items and users" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "id": "close-massachusetts", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "n_items=100\n", - "n_users=378\n" - ] - } - ], - "source": [ - "n_items = np.max(interactions_df['item_id']) + 1\n", - "n_users = np.max(interactions_df['user_id']) + 1\n", - "\n", - "print(\"n_items={}\\nn_users={}\".format(n_items, n_users))" - ] - }, - { - "cell_type": "markdown", - "id": "permanent-corrections", - "metadata": {}, - "source": [ - "## Get the maximal number of interactions" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "id": "peripheral-natural", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "max_interaction=31\n" - ] - } - ], - "source": [ - "n_user_interactions = interactions_df[['user_id', 'item_id']].groupby(\"user_id\").count()\n", - "# Unnecessary, but added for readability\n", - "n_user_interactions = n_user_interactions.rename(columns={'item_id': 'n_items'})\n", - "max_interactions = n_user_interactions['n_items'].max()\n", - "\n", - "print(\"max_interaction={}\".format(max_interactions))" - ] - }, - { - "cell_type": "markdown", - "id": "basic-production", - "metadata": {}, - "source": [ - "## Calculate P_Y's" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "id": "concrete-transparency", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{0: 0.17264957264957265, 1: 0.05042735042735043, 2: 0.015384615384615385, 3: 0.005128205128205128, 4: 0.007692307692307693, 5: 0.09145299145299145, 6: 0.04358974358974359, 7: 0.01452991452991453, 8: 0.035897435897435895, 9: 0.05384615384615385, 10: 0.04957264957264957, 11: 0.004273504273504274, 12: 0.002564102564102564, 13: 0.004273504273504274, 14: 0.007692307692307693, 15: 0.007692307692307693, 16: 0.011111111111111112, 17: 0.009401709401709401, 18: 0.005982905982905983, 19: 0.05299145299145299, 20: 0.028205128205128206, 21: 0.005128205128205128, 22: 0.01623931623931624, 23: 0.038461538461538464, 24: 0.010256410256410256, 25: 0.008547008547008548, 26: 0.002564102564102564, 27: 0.026495726495726495, 28: 0.006837606837606838, 29: 0.01282051282051282, 30: 0.0017094017094017094, 31: 0.018803418803418803, 32: 0.0017094017094017094, 33: 0.003418803418803419, 34: 0.011965811965811967, 35: 0.015384615384615385, 36: 0.007692307692307693, 37: 0.013675213675213675, 38: 0.002564102564102564, 39: 0.0008547008547008547, 40: 0.0008547008547008547, 41: 0.0017094017094017094, 42: 0.010256410256410256, 43: 0.0008547008547008547, 44: 0.0008547008547008547, 45: 0.004273504273504274, 46: 0.0008547008547008547, 47: 0.004273504273504274, 48: 0.004273504273504274, 49: 0.0008547008547008547, 50: 0.003418803418803419, 51: 0.008547008547008548, 52: 0.0017094017094017094, 53: 0.0017094017094017094, 54: 0.003418803418803419, 55: 0.003418803418803419, 56: 0.0008547008547008547, 57: 0.0008547008547008547, 58: 0.003418803418803419, 59: 0.003418803418803419, 60: 0.0017094017094017094, 61: 0.003418803418803419, 62: 0.0008547008547008547, 63: 0.004273504273504274, 64: 0.0017094017094017094, 65: 0.003418803418803419, 66: 0.0017094017094017094, 67: 0.0017094017094017094, 68: 0.0017094017094017094, 69: 0.0017094017094017094, 70: 0.0008547008547008547, 71: 0.0008547008547008547, 72: 0.002564102564102564, 73: 0.004273504273504274, 74: 0.0008547008547008547, 75: 0.0008547008547008547, 76: 0.0008547008547008547, 77: 0.0017094017094017094, 78: 0.002564102564102564, 79: 0.0008547008547008547, 80: 0.0017094017094017094, 81: 0.0017094017094017094, 82: 0.002564102564102564, 83: 0.0008547008547008547, 84: 0.0008547008547008547, 85: 0.0008547008547008547, 86: 0.0008547008547008547, 87: 0.0017094017094017094, 88: 0.0017094017094017094, 89: 0.0008547008547008547, 90: 0.0008547008547008547, 91: 0.0008547008547008547, 92: 0.0008547008547008547, 93: 0.0008547008547008547, 94: 0.0008547008547008547, 95: 0.0008547008547008547, 96: 0.0008547008547008547, 97: 0.0008547008547008547, 98: 0.0008547008547008547, 99: 0.0008547008547008547}\n" - ] - } - ], - "source": [ - "n_interactions = len(interactions_df)\n", - "p_y = interactions_df[['item_id', 'user_id']].groupby(\"item_id\").count().reset_index()\n", - "p_y = p_y.rename(columns={'user_id': 'P_Y'})\n", - "p_y.loc[:, 'P_Y'] = p_y['P_Y'] / n_interactions\n", - "p_y = dict(zip(p_y['item_id'], p_y['P_Y']))\n", - "\n", - "print(p_y)" - ] - }, - { - "cell_type": "markdown", - "id": "consolidated-constant", - "metadata": {}, - "source": [ - "## For every X calculate the E[Y|X]" - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "id": "alive-cameroon", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "p_y_powers for the first item\n", - "[1.726e-01 2.981e-02 5.146e-03 8.885e-04 1.534e-04 2.648e-05 4.573e-06\n", - " 7.894e-07 1.363e-07 2.353e-08 4.063e-09 7.014e-10 1.211e-10 2.091e-11\n", - " 3.610e-12 6.232e-13 1.076e-13 1.858e-14 3.207e-15 5.537e-16 9.560e-17\n", - " 1.651e-17 2.850e-18 4.920e-19 8.494e-20 1.467e-20 2.532e-21 4.372e-22\n", - " 7.547e-23 1.303e-23 2.250e-24]\n", - "alpha_k\n", - "[ 6.290e+02 -2.785e+03 1.408e+04 -6.937e+04 3.018e+05 -1.120e+06\n", - " 3.530e+06 -9.507e+06 2.202e+07 -4.418e+07 7.716e+07 -1.179e+08\n", - " 1.579e+08 -1.860e+08 1.928e+08 -1.759e+08 1.413e+08 -9.962e+07\n", - " 6.154e+07 -3.315e+07 1.549e+07 -6.230e+06 2.134e+06 -6.142e+05\n", - " 1.458e+05 -2.778e+04 4.088e+03 -4.360e+02 3.000e+01 -1.000e+00\n", - " 0.000e+00]\n", - "\n", - "E[Y|X]\n", - "[[65.262 26.076 9.065 3.154 4.68 ]\n", - " [28.303 19.062 4.288 1.5 2.223]\n", - " [10.216 5.074 5.815 0.712 1.046]\n", - " [ 2.315 0.859 0.283 1.938 0.144]\n", - " [ 4.526 2.47 0.999 0.366 2.908]]\n" - ] - } - ], - "source": [ - "e_xy = np.zeros(shape=(n_items, n_items))\n", - "e_xy[:][:] = -1e100\n", - " \n", - "items = interactions_df['item_id'].unique()\n", - " \n", - "p_y_powers = {}\n", - "for y in items:\n", - " p_y_powers[y] = np.array([p_y[y]**k for k in range(1, max_interactions + 1)])\n", - " \n", - "print(\"p_y_powers for the first item\")\n", - "print(p_y_powers[0])\n", - "\n", - "for x in items:\n", - " # Get users who bought X\n", - " c_x = interactions_df.loc[interactions_df['item_id'] == x]['user_id'].unique()\n", - "\n", - " # Get users who bought only X\n", - " c_only_x = interactions_df.loc[interactions_df['item_id'] != x]['user_id'].unique()\n", - " c_only_x = list(set(c_x.tolist()) - set(c_only_x.tolist()))\n", - "\n", - " # Calculate the number of non-X interactions for each user who bought X\n", - " # Include users with zero non-X interactions\n", - " n_non_x_interactions = interactions_df.loc[interactions_df['item_id'] != x, ['user_id', 'item_id']]\n", - " n_non_x_interactions = n_non_x_interactions.groupby(\"user_id\").count()\n", - " # Unnecessary, but added for readability\n", - " n_non_x_interactions = n_non_x_interactions.rename(columns={'item_id': 'n_items'})\n", - "\n", - " zero_non_x_interactions = pd.DataFrame([[0]]*len(c_only_x), columns=[\"n_items\"], index=c_only_x) # Remove\n", - " n_non_x_interactions = pd.concat([n_non_x_interactions, zero_non_x_interactions])\n", - "\n", - " n_non_x_interactions = n_non_x_interactions.loc[c_x.tolist()]\n", - "\n", - " # Calculate the expected numbers of Y products bought by clients who bought X\n", - " alpha_k = np.array([np.sum([(-1)**(k + 1) * scisp.binom(abs_c, k)\n", - " for abs_c in n_non_x_interactions[\"n_items\"]])\n", - " for k in range(1, max_interactions + 1)])\n", - " \n", - " if x == 0:\n", - " print(\"alpha_k\")\n", - " print(alpha_k)\n", - " print()\n", - "\n", - " for y in items: # Optimize to use only those Y's which have at least one client who bought both X and Y\n", - " if y != x:\n", - " e_xy[x][y] = np.sum(alpha_k * p_y_powers[y])\n", - " else:\n", - " e_xy[x][y] = n_users * p_y[x]\n", - "\n", - "print(\"E[Y|X]\")\n", - "print(np.around(e_xy[:10, :10], 3))" - ] - }, - { - "cell_type": "markdown", - "id": "acknowledged-threshold", - "metadata": {}, - "source": [ - "## Get the user-item interaction matrix" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "id": "extraordinary-mexico", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[1. 1. 1. 0. 0. 0. 0. 0. 0. 0.]\n", - " [0. 1. 0. 1. 1. 1. 0. 0. 0. 0.]\n", - " [1. 0. 0. 1. 0. 0. 1. 1. 1. 1.]\n", - " [1. 0. 0. 0. 0. 1. 0. 0. 0. 0.]\n", - " [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", - " [1. 0. 0. 0. 0. 0. 0. 0. 1. 0.]\n", - " [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", - " [1. 0. 0. 0. 0. 1. 1. 0. 0. 1.]\n", - " [0. 1. 1. 0. 0. 0. 0. 0. 0. 1.]\n", - " [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]\n" - ] - } - ], - "source": [ - "# mapping to int is necessary because of how iterrows works\n", - "r = np.zeros(shape=(n_users, n_items))\n", - "for idx, interaction in interactions_df.iterrows():\n", - " r[int(interaction['user_id'])][int(interaction['item_id'])] = 1\n", - " \n", - "print(r[:10, :10])" - ] - }, - { - "cell_type": "markdown", - "id": "lovely-password", - "metadata": {}, - "source": [ - "## Calculate the number of users who bought both X and Y" - ] - }, - { - "cell_type": "code", - "execution_count": 91, - "id": "rubber-detector", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[202. 34. 15. 3. 3. 66. 36. 10. 25. 34.]\n", - " [ 34. 59. 6. 2. 5. 24. 12. 4. 8. 12.]\n", - " [ 15. 6. 18. 1. 2. 7. 3. 4. 6. 5.]\n", - " [ 3. 2. 1. 6. 1. 1. 1. 1. 2. 2.]\n", - " [ 3. 5. 2. 1. 9. 3. 2. 1. 1. 0.]\n", - " [ 66. 24. 7. 1. 3. 107. 20. 5. 16. 18.]\n", - " [ 36. 12. 3. 1. 2. 20. 51. 8. 16. 17.]\n", - " [ 10. 4. 4. 1. 1. 5. 8. 17. 8. 10.]\n", - " [ 25. 8. 6. 2. 1. 16. 16. 8. 42. 23.]\n", - " [ 34. 12. 5. 2. 0. 18. 17. 10. 23. 63.]]\n" - ] - } - ], - "source": [ - "# Simple and slow method (commented out)\n", - "\n", - "# n_xy = np.zeros(shape=(n_items, n_items))\n", - "\n", - "# for x in items:\n", - "# for y in items:\n", - "# users_x = set(interactions_df.loc[interactions_df['item_id'] == x]['user_id'].tolist())\n", - "# users_y = set(interactions_df.loc[interactions_df['item_id'] == y]['user_id'].tolist())\n", - "# users_x_and_y = users_x & users_y\n", - "# n_xy[x][y] = len(users_x_and_y)\n", - "\n", - "# Optimized method (can be further optimized by using sparse matrices)\n", - "\n", - "n_xy = np.matmul(r.T, r)\n", - "\n", - "print(n_xy[:10, :10])" - ] - }, - { - "cell_type": "markdown", - "id": "distinguished-consequence", - "metadata": {}, - "source": [ - "## Calculate the scores" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "id": "pointed-deputy", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[16.926 1.552 1.971 -0.087 -0.777 3.789 2.689 0.48 1.235 1.235]\n", - " [ 1.071 9.148 0.827 0.408 1.863 1.15 0.376 -0.033 -0.38 -0.218]\n", - " [ 1.497 0.411 5.053 0.341 0.932 -0.142 -0.737 1.555 1.023 -0.134]\n", - " [ 0.451 1.23 1.349 2.917 2.259 -0.361 0.284 1.417 1.724 1.141]\n", - " [-0.717 1.61 1.002 1.048 3.573 -0.244 -0.164 0.051 -0.687 -1.604]\n", - " [ 2.601 0.765 -0.103 -0.97 -0.399 12.319 0.412 -0.724 0.125 -0.782]\n", - " [ 2.127 0.237 -0.522 -0.359 -0.077 0.658 8.505 2.121 2.561 1.518]\n", - " [ 0.3 -0.061 1.952 0.585 0.192 -0.484 2.235 4.91 2.697 2.728]\n", - " [ 0.724 -0.582 1.265 0.641 -0.644 0.27 2.439 2.479 7.718 3.946]\n", - " [ 1.793 0.544 0.756 0.679 -1.358 0.413 2.627 3.596 5.52 9.453]]\n" - ] - } - ], - "source": [ - "scores = np.divide(n_xy - e_xy, np.sqrt(e_xy), out=np.zeros_like(n_xy), where=e_xy != 0)\n", - "\n", - "print(np.around(scores[:10, :10], 3))" - ] - }, - { - "cell_type": "markdown", - "id": "endangered-stomach", - "metadata": {}, - "source": [ - "## Final comparison" - ] - }, - { - "cell_type": "code", - "execution_count": 103, - "id": "prepared-fraction", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "E[Y|X]\n", - "[[65.262 26.076 9.065 3.154 4.68 41.571 23.082 8.592 19.542 27.522]\n", - " [28.303 19.062 4.288 1.5 2.223 18.99 10.768 4.066 9.15 12.778]\n", - " [10.216 5.074 5.815 0.712 1.046 7.386 4.577 1.872 3.964 5.308]\n", - " [ 2.315 0.859 0.283 1.938 0.144 1.433 0.754 0.267 0.631 0.911]\n", - " [ 4.526 2.47 0.999 0.366 2.908 3.453 2.245 0.951 1.962 2.574]\n", - " [47.984 20.534 7.279 2.549 3.776 34.569 18.241 6.902 15.507 21.636]\n", - " [25.303 11.206 4.05 1.429 2.112 17.265 16.477 3.843 8.524 11.789]\n", - " [ 9.094 4.124 1.561 0.561 0.826 6.205 3.701 5.492 3.186 4.326]\n", - " [21.633 9.823 3.601 1.276 1.884 14.955 8.776 3.417 13.569 10.322]\n", - " [25.03 10.257 3.571 1.243 1.844 16.332 9.082 3.385 7.691 20.354]]\n", - "\n", - "N(X, Y)\n", - "[[202. 34. 15. 3. 3. 66. 36. 10. 25. 34.]\n", - " [ 34. 59. 6. 2. 5. 24. 12. 4. 8. 12.]\n", - " [ 15. 6. 18. 1. 2. 7. 3. 4. 6. 5.]\n", - " [ 3. 2. 1. 6. 1. 1. 1. 1. 2. 2.]\n", - " [ 3. 5. 2. 1. 9. 3. 2. 1. 1. 0.]\n", - " [ 66. 24. 7. 1. 3. 107. 20. 5. 16. 18.]\n", - " [ 36. 12. 3. 1. 2. 20. 51. 8. 16. 17.]\n", - " [ 10. 4. 4. 1. 1. 5. 8. 17. 8. 10.]\n", - " [ 25. 8. 6. 2. 1. 16. 16. 8. 42. 23.]\n", - " [ 34. 12. 5. 2. 0. 18. 17. 10. 23. 63.]]\n", - "\n", - "Scores\n", - "[[16.926 1.552 1.971 -0.087 -0.777 3.789 2.689 0.48 1.235 1.235]\n", - " [ 1.071 9.148 0.827 0.408 1.863 1.15 0.376 -0.033 -0.38 -0.218]\n", - " [ 1.497 0.411 5.053 0.341 0.932 -0.142 -0.737 1.555 1.023 -0.134]\n", - " [ 0.451 1.23 1.349 2.917 2.259 -0.361 0.284 1.417 1.724 1.141]\n", - " [-0.717 1.61 1.002 1.048 3.573 -0.244 -0.164 0.051 -0.687 -1.604]\n", - " [ 2.601 0.765 -0.103 -0.97 -0.399 12.319 0.412 -0.724 0.125 -0.782]\n", - " [ 2.127 0.237 -0.522 -0.359 -0.077 0.658 8.505 2.121 2.561 1.518]\n", - " [ 0.3 -0.061 1.952 0.585 0.192 -0.484 2.235 4.91 2.697 2.728]\n", - " [ 0.724 -0.582 1.265 0.641 -0.644 0.27 2.439 2.479 7.718 3.946]\n", - " [ 1.793 0.544 0.756 0.679 -1.358 0.413 2.627 3.596 5.52 9.453]]\n", - "\n" - ] - } - ], - "source": [ - "print(\"E[Y|X]\")\n", - "print(np.around(e_xy[:10, :10], 3))\n", - "print()\n", - "\n", - "print(\"N(X, Y)\")\n", - "print(n_xy[:10, :10])\n", - "print()\n", - "\n", - "print(\"Scores\")\n", - "print(np.around(scores[:10, :10], 3))\n", - "print()" - ] - }, - { - "cell_type": "markdown", - "id": "distant-archive", - "metadata": {}, - "source": [ - "# Inner workings of the Amazon recommender recommend method" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "id": "aerial-shipping", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Recommendation: 1, Brick (2005), 6.122652596595853\n", - "Recommendation: 1, Oh, God! (1977), 5.908857666844879\n", - "Recommendation: 1, Bubba Ho-tep (2002), 5.830666625469312\n", - "Recommendation: 1, Meatballs (1979), 5.56930833865894\n", - "Recommendation: 1, Millennium Actress (Sennen joyû) (2001), 5.502504256363742\n", - "Recommendation: 1, Honeymoon in Vegas (1992), 5.387478215471393\n", - "Recommendation: 1, Six-String Samurai (1998), 5.225652131462832\n", - "Recommendation: 1, Grass Is Greener, The (1960), 5.144470412494206\n", - "Recommendation: 1, Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001), 4.796473011676857\n", - "Recommendation: 1, Clara's Heart (1988), 4.608515964550741\n" - ] - } - ], - "source": [ - "user_id = 1\n", - "should_recommend_already_bought = False\n", - "n_recommendations = 10\n", - "\n", - "mapped_user_id = user_id_mapping[user_id]\n", - "\n", - "x_list = interactions_df.loc[interactions_df['user_id'] == mapped_user_id]['item_id'].tolist()\n", - "final_scores = np.sum(scores[x_list], axis=0)\n", - "\n", - "# Choose n recommendations based on highest scores\n", - "if not should_recommend_already_bought:\n", - " final_scores[x_list] = -1e100\n", - "\n", - "chosen_ids = np.argsort(-final_scores)[:n_recommendations]\n", - "\n", - "for item_id in chosen_ids:\n", - " print(\"Recommendation: {}, {}, {}\".format(user_id_reverse_mapping[mapped_user_id],\n", - " ml_movies_df.loc[ml_movies_df['item_id'] == item_id_reverse_mapping[item_id], \n", - " 'title'].iloc[0],\n", - " final_scores[item_id]))" - ] - }, - { - "cell_type": "markdown", - "id": "opponent-prediction", - "metadata": {}, - "source": [ - "# Amazon recommder" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "fancy-return", - "metadata": {}, - "outputs": [], - "source": [ - "from recommenders.recommender import Recommender\n", - "\n", - "class AmazonRecommender(Recommender):\n", - " \"\"\"\n", - " Basic item-to-item collaborative filtering algorithm used in Amazon.com as described in:\n", - " - Linden G., Smith B., York Y., Amazon.com Recommendations. Item-to-Item Collaborative Filtering,\n", - " IEEE Internet Computing, 2003,\n", - " - Smith B., Linden G., Two Decades of Recommender Systems at Amazon.com, IEEE Internet Computing, 2017.\n", - " \"\"\"\n", - "\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.recommender_df = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n", - " self.interactions_df = None\n", - " self.item_id_mapping = None\n", - " self.user_id_mapping = None\n", - " self.item_id_reverse_mapping = None\n", - " self.user_id_reverse_mapping = None\n", - " self.e_xy = None\n", - " self.n_xy = None\n", - " self.scores = None\n", - " self.most_popular_items = None\n", - " self.should_recommend_already_bought = False\n", - "\n", - " def initialize(self, **params):\n", - " if 'should_recommend_already_bought' in params:\n", - " self.should_recommend_already_bought = params['should_recommend_already_bought']\n", - "\n", - " def fit(self, interactions_df, users_df, items_df):\n", - " \"\"\"\n", - " Training of the recommender.\n", - "\n", - " :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items\n", - " defined by user_id, item_id and features of the interaction.\n", - " :param pd.DataFrame users_df: DataFrame with users and their features defined by\n", - " user_id and the user feature columns.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features defined\n", - " by item_id and the item feature columns.\n", - " \"\"\"\n", - "\n", - " # Shift item ids and user ids so that they are consecutive\n", - "\n", - " unique_item_ids = interactions_df['item_id'].unique()\n", - " self.item_id_mapping = dict(zip(unique_item_ids, list(range(len(unique_item_ids)))))\n", - " self.item_id_reverse_mapping = dict(zip(list(range(len(unique_item_ids))), unique_item_ids))\n", - " unique_user_ids = interactions_df['user_id'].unique()\n", - " self.user_id_mapping = dict(zip(unique_user_ids, list(range(len(unique_user_ids)))))\n", - " self.user_id_reverse_mapping = dict(zip(list(range(len(unique_user_ids))), unique_user_ids))\n", - " \n", - " interactions_df = interactions_df.copy()\n", - " interactions_df.replace({'item_id': self.item_id_mapping, 'user_id': self.user_id_mapping}, inplace=True)\n", - "\n", - " # Get the number of items and users\n", - "\n", - " self.interactions_df = interactions_df\n", - " n_items = np.max(interactions_df['item_id']) + 1\n", - " n_users = np.max(interactions_df['user_id']) + 1\n", - "\n", - " # Get maximal number of interactions\n", - "\n", - " n_user_interactions = interactions_df[['user_id', 'item_id']].groupby(\"user_id\").count()\n", - " # Unnecessary, but added for readability\n", - " n_user_interactions = n_user_interactions.rename(columns={'item_id': 'n_items'})\n", - " max_interactions = n_user_interactions['n_items'].max()\n", - "\n", - " # Calculate P_Y's\n", - "\n", - " n_interactions = len(interactions_df)\n", - " p_y = interactions_df[['item_id', 'user_id']].groupby(\"item_id\").count().reset_index()\n", - " p_y = p_y.rename(columns={'user_id': 'P_Y'})\n", - " p_y.loc[:, 'P_Y'] = p_y['P_Y'] / n_interactions\n", - " p_y = dict(zip(p_y['item_id'], p_y['P_Y']))\n", - "\n", - " # Get the series of all items\n", - "\n", - " # items = list(range(n_items))\n", - " items = interactions_df['item_id'].unique()\n", - "\n", - " # For every X calculate the E[Y|X]\n", - "\n", - " e_xy = np.zeros(shape=(n_items, n_items))\n", - " e_xy[:][:] = -1e100\n", - "\n", - " p_y_powers = {}\n", - " for y in items:\n", - " p_y_powers[y] = np.array([p_y[y]**k for k in range(1, max_interactions + 1)])\n", - "\n", - " for x in items:\n", - " # Get users who bought X\n", - " c_x = interactions_df.loc[interactions_df['item_id'] == x]['user_id'].unique()\n", - "\n", - " # Get users who bought only X\n", - " c_only_x = interactions_df.loc[interactions_df['item_id'] != x]['user_id'].unique()\n", - " c_only_x = list(set(c_x.tolist()) - set(c_only_x.tolist()))\n", - "\n", - " # Calculate the number of non-X interactions for each user who bought X\n", - " # Include users with zero non-X interactions\n", - " n_non_x_interactions = interactions_df.loc[interactions_df['item_id'] != x, ['user_id', 'item_id']]\n", - " n_non_x_interactions = n_non_x_interactions.groupby(\"user_id\").count()\n", - " # Unnecessary, but added for readability\n", - " n_non_x_interactions = n_non_x_interactions.rename(columns={'item_id': 'n_items'})\n", - "\n", - " zero_non_x_interactions = pd.DataFrame([[0]]*len(c_only_x), columns=[\"n_items\"], index=c_only_x) # Remove\n", - " n_non_x_interactions = pd.concat([n_non_x_interactions, zero_non_x_interactions])\n", - "\n", - " n_non_x_interactions = n_non_x_interactions.loc[c_x.tolist()]\n", - "\n", - " # Calculate the expected numbers of Y products bought by clients who bought X\n", - " alpha_k = np.array([np.sum([(-1)**(k + 1) * scisp.binom(abs_c, k)\n", - " for abs_c in n_non_x_interactions[\"n_items\"]])\n", - " for k in range(1, max_interactions + 1)])\n", - "\n", - " for y in items: # Optimize to use only those Y's which have at least one client who bought both X and Y\n", - " if y != x:\n", - " e_xy[x][y] = np.sum(alpha_k * p_y_powers[y])\n", - " else:\n", - " e_xy[x][y] = n_users * p_y[x]\n", - "\n", - " self.e_xy = e_xy\n", - "\n", - " # Calculate the number of users who bought both X and Y\n", - "\n", - " # Simple and slow method (commented out)\n", - "\n", - " # n_xy = np.zeros(shape=(n_items, n_items))\n", - "\n", - " # for x in items:\n", - " # for y in items:\n", - " # users_x = set(interactions_df.loc[interactions_df['item_id'] == x]['user_id'].tolist())\n", - " # users_y = set(interactions_df.loc[interactions_df['item_id'] == y]['user_id'].tolist())\n", - " # users_x_and_y = users_x & users_y\n", - " # n_xy[x][y] = len(users_x_and_y)\n", - "\n", - " # Optimized method (can be further optimized by using sparse matrices)\n", - "\n", - " # Get the user-item interaction matrix (mapping to int is necessary because of how iterrows works)\n", - " r = np.zeros(shape=(n_users, n_items))\n", - " for idx, interaction in interactions_df.iterrows():\n", - " r[int(interaction['user_id'])][int(interaction['item_id'])] = 1\n", - "\n", - " # Get the number of users who bought both X and Y\n", - "\n", - " n_xy = np.matmul(r.T, r)\n", - "\n", - " self.n_xy = n_xy\n", - " \n", - " # Calculate the scores\n", - "\n", - " self.scores = np.divide(n_xy - e_xy, np.sqrt(e_xy), out=np.zeros_like(n_xy), where=e_xy != 0)\n", - " \n", - " # Find the most popular items for the cold start problem\n", - " \n", - " offers_count = interactions_df.loc[:, ['item_id', 'user_id']].groupby(by='item_id').count()\n", - " offers_count = offers_count.sort_values('user_id', ascending=False)\n", - " self.most_popular_items = offers_count.index\n", - "\n", - " def recommend(self, users_df, items_df, n_recommendations=1):\n", - " \"\"\"\n", - " Serving of recommendations. Scores items in items_df for each user in users_df and returns\n", - " top n_recommendations for each user.\n", - "\n", - " :param pd.DataFrame users_df: DataFrame with users and their features for which\n", - " recommendations should be generated.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.\n", - " :param int n_recommendations: Number of recommendations to be returned for each user.\n", - " :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations\n", - " for each user.\n", - " :rtype: pd.DataFrame\n", - " \"\"\"\n", - "\n", - " # Clean previous recommendations (iloc could be used alternatively)\n", - " self.recommender_df = self.recommender_df[:0]\n", - " \n", - " # Handle users not in the training data\n", - "\n", - " # Map item ids\n", - " \n", - " items_df = items_df.copy()\n", - " items_df.replace({'item_id': self.user_id_mapping}, inplace=True)\n", - "\n", - " # Generate recommendations\n", - "\n", - " for idx, user in users_df.iterrows():\n", - " recommendations = []\n", - " \n", - " user_id = user['user_id']\n", - " \n", - " if user_id in self.user_id_mapping:\n", - " mapped_user_id = self.user_id_mapping[user_id]\n", - " \n", - " x_list = self.interactions_df.loc[self.interactions_df['user_id'] == mapped_user_id]['item_id'].tolist()\n", - " final_scores = np.sum(self.scores[x_list], axis=0)\n", - "\n", - " # Choose n recommendations based on highest scores\n", - " if not self.should_recommend_already_bought:\n", - " final_scores[x_list] = -1e100\n", - "\n", - " chosen_ids = np.argsort(-final_scores)[:n_recommendations]\n", - "\n", - " for item_id in chosen_ids:\n", - " recommendations.append(\n", - " {\n", - " 'user_id': self.user_id_reverse_mapping[mapped_user_id],\n", - " 'item_id': self.item_id_reverse_mapping[item_id],\n", - " 'score': final_scores[item_id]\n", - " }\n", - " )\n", - " else: # For new users recommend most popular items\n", - " for i in range(n_recommendations):\n", - " recommendations.append(\n", - " {\n", - " 'user_id': user['user_id'],\n", - " 'item_id': self.item_id_reverse_mapping[self.most_popular_items[i]],\n", - " 'score': 1.0\n", - " }\n", - " )\n", - "\n", - " user_recommendations = pd.DataFrame(recommendations)\n", - "\n", - " self.recommender_df = pd.concat([self.recommender_df, user_recommendations])\n", - "\n", - " return self.recommender_df" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "nonprofit-roads", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Recommendations\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iditem_idscoretitlegenres
01447616.122653Brick (2005)Crime|Drama|Film-Noir|Mystery
1152145.908858Oh, God! (1977)Comedy|Fantasy
2167555.830667Bubba Ho-tep (2002)Comedy|Horror
3130405.569308Meatballs (1979)Comedy
4167135.502504Millennium Actress (Sennen joyû) (2001)Animation|Drama|Romance
5136145.387478Honeymoon in Vegas (1992)Comedy|Romance
6122755.225652Six-String Samurai (1998)Action|Adventure|Sci-Fi
7147965.144470Grass Is Greener, The (1960)Comedy|Romance
8148964.796473Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)Adventure|Children|Fantasy
9137144.608516Clara's Heart (1988)Drama
10436147.825335Honeymoon in Vegas (1992)Comedy|Romance
11467137.407051Millennium Actress (Sennen joyû) (2001)Animation|Drama|Romance
12426906.599105Ideal Husband, An (1999)Comedy|Romance
134447616.205835Brick (2005)Crime|Drama|Film-Noir|Mystery
14436286.186298Flying Tigers (1942)Action|Drama|Romance|War
15467555.977848Bubba Ho-tep (2002)Comedy|Horror
1649595.919668Of Human Bondage (1934)Drama
174312605.919668Boys Town (1938)Drama
18460335.919668Mystery Date (1991)Comedy
19437145.919668Clara's Heart (1988)Drama
206361411.392962Honeymoon in Vegas (1992)Comedy|Romance
216319218.329693Seven-Per-Cent Solution, The (1976)Adventure|Comedy|Crime|Drama|Mystery|Thriller
22617528.236954Hard Rain (1998)Action|Crime|Thriller
236951478.006113Dragon Ball: Sleeping Princess in Devil's Castle (Doragon bôru: Majinjô no nemuri hime) (1987)Action|Adventure|Animation|Children
24622756.941940Six-String Samurai (1998)Action|Adventure|Sci-Fi
25634796.771276Ladyhawke (1985)Adventure|Fantasy|Romance
26667556.520369Bubba Ho-tep (2002)Comedy|Horror
27665376.454421Terminator 3: Rise of the Machines (2003)Action|Adventure|Sci-Fi
28644836.339894Caddyshack II (1988)Comedy
2962286.174734Destiny Turns on the Radio (1995)Comedy
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Quick test of the recommender\n", - "\n", - "amazon_recommender = AmazonRecommender()\n", - "amazon_recommender.fit(ml_ratings_df, None, ml_movies_df)\n", - "recommendations = amazon_recommender.recommend(pd.DataFrame([[1], [4], [6]], columns=['user_id']), ml_movies_df, 10)\n", - "\n", - "recommendations = pd.merge(recommendations, ml_movies_df, on='item_id', how='left')\n", - "print(\"Recommendations\")\n", - "display(HTML(recommendations.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "framed-negative", - "metadata": {}, - "source": [ - "# Training-test split evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "romantic-music", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0AmazonRecommender0.1818180.3116880.4025970.5519480.1818180.2578060.2946820.34147
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from evaluation_and_testing.testing import evaluate_train_test_split_implicit\n", - "\n", - "amazon_recommender = AmazonRecommender()\n", - "\n", - "amazon_tts_results = [['AmazonRecommender'] + list(evaluate_train_test_split_implicit(\n", - " amazon_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]\n", - "\n", - "amazon_tts_results = pd.DataFrame(\n", - " amazon_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(amazon_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "saving-harrison", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0TFIDFRecommender0.0259740.0909090.1363640.3181820.0259740.0643930.0836850.140799
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from recommenders.tfidf_recommender import TFIDFRecommender\n", - "\n", - "tfidf_recommender = TFIDFRecommender()\n", - "\n", - "tfidf_tts_results = [['TFIDFRecommender'] + list(evaluate_train_test_split_implicit(\n", - " tfidf_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]\n", - "\n", - "tfidf_tts_results = pd.DataFrame(\n", - " tfidf_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(tfidf_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "id": "random-source", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0AmazonRecommender0.1818180.3116880.4025970.5519480.1818180.2578060.2946820.341470
1TFIDFRecommender0.0259740.0909090.1363640.3181820.0259740.0643930.0836850.140799
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "tts_results = pd.concat([amazon_tts_results, tfidf_tts_results]).reset_index(drop=True)\n", - "display(HTML(tts_results.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "continued-harassment", - "metadata": {}, - "source": [ - "# Leave-one-out evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "prerequisite-lounge", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0AmazonRecommender0.1666670.2566670.320.4266670.1666670.2190860.2454860.279978
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from evaluation_and_testing.testing import evaluate_leave_one_out_implicit\n", - "\n", - "amazon_recommender = AmazonRecommender()\n", - "\n", - "amazon_loo_results = [['AmazonRecommender'] + list(evaluate_leave_one_out_implicit(\n", - " amazon_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]\n", - "\n", - "amazon_loo_results = pd.DataFrame(\n", - " amazon_loo_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(amazon_loo_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "behind-cambodia", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0TFIDFRecommender0.0066670.0533330.1233330.2333330.0066670.0334910.0621780.096151
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "tfidf_recommender = TFIDFRecommender()\n", - "\n", - "tfidf_loo_results = [['TFIDFRecommender'] + list(evaluate_leave_one_out_implicit(\n", - " tfidf_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]\n", - "\n", - "tfidf_loo_results = pd.DataFrame(\n", - " tfidf_loo_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(tfidf_loo_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "id": "lightweight-password", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0AmazonRecommender0.1666670.2566670.3200000.4266670.1666670.2190860.2454860.279978
1TFIDFRecommender0.0066670.0533330.1233330.2333330.0066670.0334910.0621780.096151
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "loo_results = pd.concat([amazon_loo_results, tfidf_loo_results]).reset_index(drop=True)\n", - "display(HTML(loo_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "mediterranean-residence", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/class_6_collaborative_filtering.ipynb b/class_6_collaborative_filtering.ipynb deleted file mode 100644 index 86f232d..0000000 --- a/class_6_collaborative_filtering.ipynb +++ /dev/null @@ -1,2599 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "verified-accommodation", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "from IPython.display import Markdown, display, HTML\n", - "from collections import defaultdict\n", - "\n", - "# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n", - "import os\n", - "os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'" - ] - }, - { - "cell_type": "markdown", - "id": "educated-tourist", - "metadata": {}, - "source": [ - "# Load data" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "looking-feeling", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
item_idtitlegenres
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
277318Shawshank Redemption, The (1994)Crime|Drama
8961193One Flew Over the Cuckoo's Nest (1975)Drama
9091208Apocalypse Now (1979)Action|Drama|War
9151214Alien (1979)Horror|Sci-Fi
12911721Titanic (1997)Drama|Romance
22262959Fight Club (1999)Action|Crime|Drama|Thriller
26743578Gladiator (2000)Action|Adventure|Drama
31944306Shrek (2001)Adventure|Animation|Children|Comedy|Fantasy|Romance
8376109487Interstellar (2014)Sci-Fi|IMAX
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of interactions left: 1689\n" - ] - } - ], - "source": [ - "ml_ratings_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"ratings.csv\")).rename(columns={'userId': 'user_id', 'movieId': 'item_id'})\n", - "ml_movies_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"movies.csv\")).rename(columns={'movieId': 'item_id'})\n", - "ml_df = pd.merge(ml_ratings_df, ml_movies_df, on='item_id')\n", - "\n", - "# Filter the data to reduce the number of movies\n", - "seed = 6789\n", - "rng = np.random.RandomState(seed=seed)\n", - "left_ids = [1, 318, 1193, 1208, 1214, 1721, 2959, 3578, 4306, 109487]\n", - "\n", - "ml_ratings_df = ml_ratings_df.loc[ml_ratings_df['item_id'].isin(left_ids)]\n", - "ml_movies_df = ml_movies_df.loc[ml_movies_df['item_id'].isin(left_ids)]\n", - "ml_df = ml_df.loc[ml_df['item_id'].isin(left_ids)]\n", - "\n", - "display(HTML(ml_movies_df.head(10).to_html()))\n", - "\n", - "print(\"Number of interactions left: {}\".format(len(ml_ratings_df)))" - ] - }, - { - "cell_type": "markdown", - "id": "plastic-brooklyn", - "metadata": {}, - "source": [ - "# Shift item ids and user ids so that they are consecutive" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "valuable-modem", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iditem_idratingtimestamp
0004.0964982703
72014.0964983250
75024.0964981855
192035.0964983282
219045.0964980668
232153.01445714835
235144.01445714885
255163.01445715145
458232.0945078528
516304.0847434962
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "interactions_df = ml_ratings_df.copy()\n", - "\n", - "unique_item_ids = interactions_df['item_id'].unique()\n", - "item_id_mapping = dict(zip(unique_item_ids, list(range(len(unique_item_ids)))))\n", - "item_id_reverse_mapping = dict(zip(list(range(len(unique_item_ids))), unique_item_ids))\n", - "unique_user_ids = interactions_df['user_id'].unique()\n", - "user_id_mapping = dict(zip(unique_user_ids, list(range(len(unique_user_ids)))))\n", - "user_id_reverse_mapping = dict(zip(list(range(len(unique_user_ids))), unique_user_ids))\n", - "\n", - "interactions_df.replace({'item_id': item_id_mapping, 'user_id': user_id_mapping}, inplace=True)\n", - "\n", - "display(HTML(interactions_df.head(10).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "basic-meeting", - "metadata": {}, - "source": [ - "# Get the number of items and users" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "close-massachusetts", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "n_items=10\n", - "n_users=521\n" - ] - } - ], - "source": [ - "n_items = np.max(interactions_df['item_id']) + 1\n", - "n_users = np.max(interactions_df['user_id']) + 1\n", - "\n", - "print(\"n_items={}\\nn_users={}\".format(n_items, n_users))" - ] - }, - { - "cell_type": "markdown", - "id": "acknowledged-threshold", - "metadata": {}, - "source": [ - "# Get the user-item interaction matrix" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "extraordinary-mexico", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[1. 1. 1. 1. 1. 0. 0. 0. 0. 0.]\n", - " [0. 0. 0. 0. 1. 1. 1. 0. 0. 0.]\n", - " [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", - " [1. 0. 0. 0. 0. 1. 0. 0. 0. 0.]\n", - " [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]\n", - " [1. 1. 0. 0. 1. 0. 0. 1. 0. 0.]\n", - " [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]\n", - " [0. 0. 0. 1. 1. 0. 1. 1. 0. 0.]\n", - " [0. 0. 0. 0. 0. 1. 0. 0. 1. 0.]\n", - " [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]]\n" - ] - } - ], - "source": [ - "# mapping to int is necessary because of how iterrows works\n", - "r = np.zeros(shape=(n_users, n_items))\n", - "for idx, interaction in interactions_df.iterrows():\n", - " r[int(interaction['user_id'])][int(interaction['item_id'])] = 1\n", - " \n", - "print(r[:10, :10])" - ] - }, - { - "cell_type": "markdown", - "id": "lovely-password", - "metadata": {}, - "source": [ - "# Calculate cosine similarities of users\n", - "\n", - "
\n", - "$$\n", - " \\text{Sim}(\\vec{u}, \\vec{v}) = \\text{Cos}(\\vec{u}, \\vec{v}) = \\frac{\\vec{u} \\cdot \\vec{v}}{\\lVert u \\rVert \\lVert v \\rVert} = \\frac{\\sum_{i = 1}^n u_i v_i}{\\sqrt{\\sum_{i = 1}^n u_i^2} \\sqrt{\\sum_{i = 1}^n v_i^2}}\n", - "$$\n", - "
\n", - "\n", - "For interaction vectors cosine similarity changes from 0 to 1. 1 means that both vectors are identical. 0 means that they have no 1's in common." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "rubber-detector", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.7071067811865475\n", - "1.0\n", - "0.0\n" - ] - } - ], - "source": [ - "def cosine(u, v):\n", - " return np.sum(u * v) / np.sqrt(np.sum(u * u) * np.sum(v * v))\n", - "\n", - "print(cosine(np.array([1, 0, 1, 0]), np.array([1, 0, 0, 0])))\n", - "print(cosine(np.array([1, 0, 1, 0]), np.array([1, 0, 1, 0])))\n", - "print(cosine(np.array([1, 0, 1, 0]), np.array([0, 1, 0, 1])))\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "second-research", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cosine similarity between user 0 and 1\n", - "[1. 1. 1. 1. 1. 0. 0. 0. 0. 0.]\n", - "[0. 0. 0. 0. 1. 1. 1. 0. 0. 0.]\n", - "0.2581988897471611\n" - ] - } - ], - "source": [ - "print(\"Cosine similarity between user 0 and 1\")\n", - "print(r[0])\n", - "print(r[1])\n", - "print(cosine(r[0], r[1]))" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "pacific-bishop", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cosine similarity between user 0 and 5\n", - "[1. 1. 1. 1. 1. 0. 0. 0. 0. 0.]\n", - "[1. 1. 0. 0. 1. 0. 0. 1. 0. 0.]\n", - "0.6708203932499369\n" - ] - } - ], - "source": [ - "print(\"Cosine similarity between user 0 and 5\")\n", - "print(r[0])\n", - "print(r[5])\n", - "print(cosine(r[0], r[5]))" - ] - }, - { - "cell_type": "markdown", - "id": "distinguished-consequence", - "metadata": {}, - "source": [ - "# Calculate Pearson similarities of users\n", - "\n", - "
\n", - "$$\n", - " \\text{Sim}(\\vec{u}, \\vec{v}) = \\text{Pearson}(\\vec{u}, \\vec{v}) = \\frac{\\sum_{i = 1}^n (u_i - \\bar{u}) (v_i - \\bar{v})}{\\sqrt{\\sum_{i = 1}^n (u_i - \\bar{u})^2} \\sqrt{\\sum_{i = 1}^n (v_i - \\bar{v})^2}}\n", - "$$\n", - "
\n", - "\n", - "Correlation changes from -1 to 1. Correlation of 1 means that vectors are identical, -1 means they are opposites." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "pointed-deputy", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.5773502691896258\n", - "1.0\n", - "-1.0\n" - ] - } - ], - "source": [ - "def pearson(u, v):\n", - " return np.sum((u - np.mean(u)) * (v - np.mean(v))) / (len(u) * np.std(u) * np.std(v))\n", - "\n", - "print(pearson(np.array([1, 0, 1, 0]), np.array([1, 0, 0, 0])))\n", - "print(pearson(np.array([1, 0, 1, 0]), np.array([1, 0, 1, 0])))\n", - "print(pearson(np.array([1, 0, 1, 0]), np.array([0, 1, 0, 1])))" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "previous-idaho", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Pearson similarity between user 0 and 1\n", - "[1. 1. 1. 1. 1. 0. 0. 0. 0. 0.]\n", - "[0. 0. 0. 0. 1. 1. 1. 0. 0. 0.]\n", - "-0.2182178902359924\n" - ] - } - ], - "source": [ - "print(\"Pearson similarity between user 0 and 1\")\n", - "print(r[0])\n", - "print(r[1])\n", - "print(pearson(r[0], r[1]))" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "interpreted-carol", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Pearson similarity between user 0 and 5\n", - "[1. 1. 1. 1. 1. 0. 0. 0. 0. 0.]\n", - "[1. 1. 0. 0. 1. 0. 0. 1. 0. 0.]\n", - "0.40824829046386296\n" - ] - } - ], - "source": [ - "print(\"Pearson similarity between user 0 and 5\")\n", - "print(r[0])\n", - "print(r[5])\n", - "print(pearson(r[0], r[5]))" - ] - }, - { - "cell_type": "markdown", - "id": "israeli-operation", - "metadata": {}, - "source": [ - "# All cosine similarities" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "national-pharmaceutical", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Scalar products\n", - "[[5. 1. 1. 1. 0. 3. 0. 2. 0. 0.]\n", - " [1. 3. 0. 1. 1. 1. 1. 2. 1. 0.]\n", - " [1. 0. 1. 0. 0. 0. 0. 1. 0. 0.]\n", - " [1. 1. 0. 2. 1. 1. 1. 0. 1. 0.]\n", - " [0. 1. 0. 1. 1. 0. 1. 0. 1. 0.]\n", - " [3. 1. 0. 1. 0. 4. 0. 2. 0. 0.]\n", - " [0. 1. 0. 1. 1. 0. 1. 0. 1. 0.]\n", - " [2. 2. 1. 0. 0. 2. 0. 4. 0. 0.]\n", - " [0. 1. 0. 1. 1. 0. 1. 0. 2. 1.]\n", - " [0. 0. 0. 0. 0. 0. 0. 0. 1. 1.]]\n", - "\n", - "Norms\n", - "[2.236 1.732 1. 1.414 1. 2. 1. 2. 1.414 1. ]\n", - "\n", - "Cosine similarities\n", - "[[1. 0.258 0.447 0.316 0. 0.671 0. 0.447 0. 0. ]\n", - " [0.258 1. 0. 0.408 0.577 0.289 0.577 0.577 0.408 0. ]\n", - " [0.447 0. 1. 0. 0. 0. 0. 0.5 0. 0. ]\n", - " [0.316 0.408 0. 1. 0.707 0.354 0.707 0. 0.5 0. ]\n", - " [0. 0.577 0. 0.707 1. 0. 1. 0. 0.707 0. ]\n", - " [0.671 0.289 0. 0.354 0. 1. 0. 0.5 0. 0. ]\n", - " [0. 0.577 0. 0.707 1. 0. 1. 0. 0.707 0. ]\n", - " [0.447 0.577 0.5 0. 0. 0.5 0. 1. 0. 0. ]\n", - " [0. 0.408 0. 0.5 0.707 0. 0.707 0. 1. 0.707]\n", - " [0. 0. 0. 0. 0. 0. 0. 0. 0.707 1. ]]\n" - ] - } - ], - "source": [ - "n_uv = np.matmul(r, r.T)\n", - "\n", - "norms = np.sqrt(np.diag(n_uv))\n", - "\n", - "cos_sim = n_uv / norms[:, np.newaxis] / norms[np.newaxis, :]\n", - "\n", - "print(\"Scalar products\")\n", - "print(n_uv[:10, :10])\n", - "print()\n", - "\n", - "print(\"Norms\")\n", - "print(np.around(norms[:10], 3))\n", - "print()\n", - "\n", - "print(\"Cosine similarities\")\n", - "print(np.around(cos_sim[:10, :10], 3))" - ] - }, - { - "cell_type": "markdown", - "id": "decent-singing", - "metadata": {}, - "source": [ - "# All Pearson similarities" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "technological-arena", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Scalar products\n", - "[[ 2.5 -0.5 0.5 -0. -0.5 1. -0.5 -0. -1. -0.5]\n", - " [-0.5 2.1 -0.3 0.4 0.7 -0.2 0.7 0.8 0.4 -0.3]\n", - " [ 0.5 -0.3 0.9 -0.2 -0.1 -0.4 -0.1 0.6 -0.2 -0.1]\n", - " [-0. 0.4 -0.2 1.6 0.8 0.2 0.8 -0.8 0.6 -0.2]\n", - " [-0.5 0.7 -0.1 0.8 0.9 -0.4 0.9 -0.4 0.8 -0.1]\n", - " [ 1. -0.2 -0.4 0.2 -0.4 2.4 -0.4 0.4 -0.8 -0.4]\n", - " [-0.5 0.7 -0.1 0.8 0.9 -0.4 0.9 -0.4 0.8 -0.1]\n", - " [-0. 0.8 0.6 -0.8 -0.4 0.4 -0.4 2.4 -0.8 -0.4]\n", - " [-1. 0.4 -0.2 0.6 0.8 -0.8 0.8 -0.8 1.6 0.8]\n", - " [-0.5 -0.3 -0.1 -0.2 -0.1 -0.4 -0.1 -0.4 0.8 0.9]]\n", - "\n", - "Norms\n", - "[1.581 1.449 0.949 1.265 0.949 1.549 0.949 1.549 1.265 0.949]\n", - "\n", - "Pearson similarities\n", - "[[ 1. -0.218 0.333 -0. -0.333 0.408 -0.333 -0. -0.5 -0.333]\n", - " [-0.218 1. -0.218 0.218 0.509 -0.089 0.509 0.356 0.218 -0.218]\n", - " [ 0.333 -0.218 1. -0.167 -0.111 -0.272 -0.111 0.408 -0.167 -0.111]\n", - " [-0. 0.218 -0.167 1. 0.667 0.102 0.667 -0.408 0.375 -0.167]\n", - " [-0.333 0.509 -0.111 0.667 1. -0.272 1. -0.272 0.667 -0.111]\n", - " [ 0.408 -0.089 -0.272 0.102 -0.272 1. -0.272 0.167 -0.408 -0.272]\n", - " [-0.333 0.509 -0.111 0.667 1. -0.272 1. -0.272 0.667 -0.111]\n", - " [-0. 0.356 0.408 -0.408 -0.272 0.167 -0.272 1. -0.408 -0.272]\n", - " [-0.5 0.218 -0.167 0.375 0.667 -0.408 0.667 -0.408 1. 0.667]\n", - " [-0.333 -0.218 -0.111 -0.167 -0.111 -0.272 -0.111 -0.272 0.667 1. ]]\n" - ] - } - ], - "source": [ - "r_shifted = r - np.mean(r, axis=1).reshape(-1, 1)\n", - "\n", - "n_uv = np.matmul(r_shifted, r_shifted.T)\n", - "\n", - "norms = np.sqrt(np.diag(n_uv))\n", - "\n", - "norms[norms == 0] = 0.000001\n", - "\n", - "person_sim = n_uv / norms[:, np.newaxis] / norms[np.newaxis, :]\n", - "\n", - "print(\"Scalar products\")\n", - "print(np.around(n_uv[:10, :10], 3))\n", - "print()\n", - "\n", - "print(\"Norms\")\n", - "print(np.around(norms[:10], 3))\n", - "print()\n", - "\n", - "print(\"Pearson similarities\")\n", - "print(np.around(person_sim[:10, :10], 3))" - ] - }, - { - "cell_type": "markdown", - "id": "pregnant-graph", - "metadata": {}, - "source": [ - "# Calculate scores of all items for user 0" - ] - }, - { - "cell_type": "markdown", - "id": "elect-gates", - "metadata": {}, - "source": [ - "## Find n closest neighbors" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "polish-socket", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Nearest neighbors\n", - "[138 387 240 399 513 285 473 172 24 270]\n", - "\n", - "User 0\n", - "[1. 1. 1. 1. 1. 0. 0. 0. 0. 0.]\n", - "\n", - "User 138\n", - "[1. 1. 1. 1. 1. 0. 0. 1. 0. 0.]\n", - "\n", - "User 387\n", - "[1. 1. 1. 1. 1. 0. 1. 0. 0. 1.]\n", - "\n", - "User 240\n", - "[1. 1. 1. 1. 1. 1. 1. 0. 0. 0.]\n" - ] - } - ], - "source": [ - "np.fill_diagonal(cos_sim, -1)\n", - "\n", - "user_id = 0\n", - "n_neighbors = 10\n", - "\n", - "neighbor_ids = np.argsort(-cos_sim[user_id])[:n_neighbors]\n", - "\n", - "print(\"Nearest neighbors\")\n", - "print(neighbor_ids)\n", - "print()\n", - "\n", - "print(\"User {}\".format(user_id))\n", - "print(r[user_id])\n", - "print()\n", - "print(\"User 138\")\n", - "print(r[138])\n", - "print()\n", - "print(\"User 387\")\n", - "print(r[387])\n", - "print()\n", - "print(\"User 240\")\n", - "print(r[240])" - ] - }, - { - "cell_type": "markdown", - "id": "seeing-balance", - "metadata": {}, - "source": [ - "## Score all items\n", - "\n", - "
\n", - "$$\n", - " \\text{score(i)} = \\frac{\\sum_{v \\in N(u)} \\text{Sim}(u, v) \\cdot v(i)}{\\sum_{v \\in N(u)} |\\text{Sim}(u, v)|}\n", - "$$\n", - "
" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "documented-discipline", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Interactions for nearest neighbors\n", - "[[1. 1. 1. 1. 1. 0. 0. 1. 0. 0.]\n", - " [1. 1. 1. 1. 1. 0. 1. 0. 0. 1.]\n", - " [1. 1. 1. 1. 1. 1. 1. 0. 0. 0.]\n", - " [1. 1. 1. 1. 1. 1. 0. 0. 1. 0.]\n", - " [1. 1. 1. 1. 1. 0. 0. 0. 1. 1.]\n", - " [1. 1. 1. 1. 1. 1. 0. 1. 0. 0.]\n", - " [1. 1. 1. 1. 0. 1. 0. 0. 0. 0.]\n", - " [1. 1. 1. 1. 0. 1. 0. 0. 0. 0.]\n", - " [0. 1. 1. 1. 1. 1. 0. 0. 0. 0.]\n", - " [0. 1. 1. 1. 1. 1. 0. 0. 0. 0.]]\n", - "\n", - "similarities\n", - "[0.91287093 0.84515425 0.84515425 0.84515425 0.84515425 0.84515425\n", - " 0.8 0.8 0.8 0.8 ]\n", - "\n", - "v_i\n", - "[0. 1. 1. 0. 0. 0. 0. 0. 0. 0.]\n", - "\n", - "score for user_id=0 and item_id=6\n", - "0.202707883171415\n" - ] - } - ], - "source": [ - "def score(similarities, v_i):\n", - " return np.sum(similarities * v_i) / np.sum(similarities)\n", - "\n", - "item_id = 6\n", - "\n", - "print(\"Interactions for nearest neighbors\")\n", - "print(r[neighbor_ids])\n", - "print()\n", - "\n", - "similarities = cos_sim[user_id][neighbor_ids]\n", - "print(\"similarities\")\n", - "print(similarities)\n", - "print()\n", - "\n", - "v_i = r[neighbor_ids][:, item_id]\n", - "print(\"v_i\")\n", - "print(v_i)\n", - "print()\n", - "\n", - "print(\"score for user_id={} and item_id={}\".format(user_id, item_id))\n", - "print(score(similarities, v_i))" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "tribal-brown", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1. 1. 1. 1. 1. 0. 0. 0. 0. 0.]\n", - "score for user_id=0 and item_id=0\n", - "0.81\n", - "score for user_id=0 and item_id=1\n", - "1.0\n", - "score for user_id=0 and item_id=2\n", - "1.0\n", - "score for user_id=0 and item_id=3\n", - "1.0\n", - "score for user_id=0 and item_id=4\n", - "0.81\n", - "score for user_id=0 and item_id=5\n", - "0.69\n", - "score for user_id=0 and item_id=6\n", - "0.2\n", - "score for user_id=0 and item_id=7\n", - "0.21\n", - "score for user_id=0 and item_id=8\n", - "0.2\n", - "score for user_id=0 and item_id=9\n", - "0.2\n" - ] - } - ], - "source": [ - "print(r[user_id])\n", - "\n", - "for i in range(10):\n", - " similarities = cos_sim[user_id][neighbor_ids]\n", - " v_i = r[neighbor_ids][:, i]\n", - " print(\"score for user_id={} and item_id={}\".format(user_id, i))\n", - " print(round(score(similarities, v_i), 2))" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "furnished-charter", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.80812224 1. 1. 1. 0.80812224 0.68781735\n", - " 0.20270788 0.21082871 0.20270788 0.20270788]\n" - ] - } - ], - "source": [ - "# The same scoring with a single operation\n", - "\n", - "item_ids = list(range(10))\n", - "\n", - "v_i = r[neighbor_ids][:, item_ids]\n", - "\n", - "scores = np.matmul(similarities, v_i) / np.sum(similarities)\n", - "\n", - "print(scores)" - ] - }, - { - "cell_type": "markdown", - "id": "endangered-stomach", - "metadata": {}, - "source": [ - "# Load a bigger dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "prepared-fraction", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
item_idtitlegenres
118145Bad Boys (1995)Action|Comedy|Crime|Drama|Thriller
143171Jeffrey (1995)Comedy|Drama
194228Destiny Turns on the Radio (1995)Comedy
199233Exotica (1994)Drama
230267Major Payne (1995)Comedy
313355Flintstones, The (1994)Children|Comedy|Fantasy
379435Coneheads (1993)Comedy|Sci-Fi
419481Kalifornia (1993)Drama|Thriller
615780Independence Day (a.k.a. ID4) (1996)Action|Adventure|Sci-Fi|Thriller
737959Of Human Bondage (1934)Drama
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of interactions left: 1170\n" - ] - } - ], - "source": [ - "ml_ratings_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"ratings.csv\")).rename(columns={'userId': 'user_id', 'movieId': 'item_id'})\n", - "ml_movies_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"movies.csv\")).rename(columns={'movieId': 'item_id'})\n", - "ml_df = pd.merge(ml_ratings_df, ml_movies_df, on='item_id')\n", - "\n", - "# Filter the data to reduce the number of movies\n", - "seed = 6789\n", - "rng = np.random.RandomState(seed=seed)\n", - "left_ids = rng.choice(ml_movies_df['item_id'], size=100, replace=False)\n", - "\n", - "ml_ratings_df = ml_ratings_df.loc[ml_ratings_df['item_id'].isin(left_ids)]\n", - "ml_movies_df = ml_movies_df.loc[ml_movies_df['item_id'].isin(left_ids)]\n", - "ml_df = ml_df.loc[ml_df['item_id'].isin(left_ids)]\n", - "\n", - "display(HTML(ml_movies_df.head(10).to_html()))\n", - "\n", - "print(\"Number of interactions left: {}\".format(len(ml_ratings_df)))" - ] - }, - { - "cell_type": "markdown", - "id": "opponent-prediction", - "metadata": {}, - "source": [ - "# User-based neighborhood recommender" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "fancy-return", - "metadata": {}, - "outputs": [], - "source": [ - "from recommenders.recommender import Recommender\n", - "\n", - "class NearestNeighborsRecommender(Recommender):\n", - " \"\"\"\n", - " Nearest neighbors recommender allowing to do user-based or item-based collaborative filtering.\n", - "\n", - " Possible similarity measures:\n", - " - 'cosine',\n", - " - 'pearson'.\n", - " \"\"\"\n", - "\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.recommender_df = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n", - " self.interactions_df = None\n", - " self.item_id_mapping = None\n", - " self.user_id_mapping = None\n", - " self.item_id_reverse_mapping = None\n", - " self.user_id_reverse_mapping = None\n", - " self.r = None\n", - " self.similarities = None\n", - " self.most_popular_items = None\n", - "\n", - " self.collaboration_type = 'user'\n", - " self.similarity_measure = 'cosine'\n", - " self.n_neighbors = 10\n", - " self.should_recommend_already_bought = False\n", - "\n", - " def initialize(self, **params):\n", - " if 'n_neighbors' in params:\n", - " self.n_neighbors = params['n_neighbors']\n", - " if 'should_recommend_already_bought' in params:\n", - " self.should_recommend_already_bought = params['should_recommend_already_bought']\n", - "\n", - " def fit(self, interactions_df, users_df, items_df):\n", - " \"\"\"\n", - " Training of the recommender.\n", - "\n", - " :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items\n", - " defined by user_id, item_id and features of the interaction.\n", - " :param pd.DataFrame users_df: DataFrame with users and their features defined by\n", - " user_id and the user feature columns.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features defined\n", - " by item_id and the item feature columns.\n", - " \"\"\"\n", - "\n", - " del users_df, items_df\n", - "\n", - " # Shift item ids and user ids so that they are consecutive\n", - "\n", - " unique_item_ids = interactions_df['item_id'].unique()\n", - " self.item_id_mapping = dict(zip(unique_item_ids, list(range(len(unique_item_ids)))))\n", - " self.item_id_reverse_mapping = dict(zip(list(range(len(unique_item_ids))), unique_item_ids))\n", - " unique_user_ids = interactions_df['user_id'].unique()\n", - " self.user_id_mapping = dict(zip(unique_user_ids, list(range(len(unique_user_ids)))))\n", - " self.user_id_reverse_mapping = dict(zip(list(range(len(unique_user_ids))), unique_user_ids))\n", - "\n", - " interactions_df = interactions_df.copy()\n", - " interactions_df.replace({'item_id': self.item_id_mapping, 'user_id': self.user_id_mapping}, inplace=True)\n", - "\n", - " # Get the number of items and users\n", - "\n", - " self.interactions_df = interactions_df\n", - " n_items = np.max(interactions_df['item_id']) + 1\n", - " n_users = np.max(interactions_df['user_id']) + 1\n", - "\n", - " # Get the user-item interaction matrix (mapping to int is necessary because of how iterrows works)\n", - " r = np.zeros(shape=(n_users, n_items))\n", - " for idx, interaction in interactions_df.iterrows():\n", - " r[int(interaction['user_id'])][int(interaction['item_id'])] = 1\n", - "\n", - " if self.collaboration_type == 'item':\n", - " r = r.T\n", - "\n", - " self.r = r\n", - "\n", - " # Calculate all similarities\n", - "\n", - " similarities = None\n", - " if self.similarity_measure == 'cosine':\n", - " n_uv = np.matmul(r, r.T)\n", - " norms = np.sqrt(np.diag(n_uv))\n", - " similarities = n_uv / norms[:, np.newaxis] / norms[np.newaxis, :]\n", - " elif self.similarity_measure == 'pearson':\n", - " r_shifted = r - np.mean(r, axis=1).reshape(-1, 1)\n", - " n_uv = np.matmul(r_shifted, r_shifted.T)\n", - " norms = np.sqrt(np.diag(n_uv))\n", - " norms[norms == 0] = 0.000001\n", - " similarities = n_uv / norms[:, np.newaxis] / norms[np.newaxis, :]\n", - "\n", - " np.fill_diagonal(similarities, -1000)\n", - "\n", - " self.similarities = similarities\n", - "\n", - " # Find the most popular items for the cold start problem\n", - "\n", - " offers_count = interactions_df.loc[:, ['item_id', 'user_id']].groupby(by='item_id').count()\n", - " offers_count = offers_count.sort_values('user_id', ascending=False)\n", - " self.most_popular_items = offers_count.index\n", - "\n", - " def recommend(self, users_df, items_df, n_recommendations=1):\n", - " \"\"\"\n", - " Serving of recommendations. Scores items in items_df for each user in users_df and returns\n", - " top n_recommendations for each user.\n", - "\n", - " :param pd.DataFrame users_df: DataFrame with users and their features for which\n", - " recommendations should be generated.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.\n", - " :param int n_recommendations: Number of recommendations to be returned for each user.\n", - " :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations\n", - " for each user.\n", - " :rtype: pd.DataFrame\n", - " \"\"\"\n", - "\n", - " # Clean previous recommendations (iloc could be used alternatively)\n", - " self.recommender_df = self.recommender_df[:0]\n", - "\n", - " # Handle users not in the training data\n", - "\n", - " # Map item ids\n", - "\n", - " items_df = items_df.copy()\n", - " items_df = items_df.loc[items_df['item_id'].isin(self.item_id_mapping)]\n", - " items_df.replace({'item_id': self.item_id_mapping}, inplace=True)\n", - "\n", - " # Generate recommendations\n", - "\n", - " for idx, user in users_df.iterrows():\n", - " recommendations = []\n", - "\n", - " user_id = user['user_id']\n", - "\n", - " if user_id in self.user_id_mapping:\n", - " chosen_ids = []\n", - " scores = []\n", - " mapped_user_id = self.user_id_mapping[user_id]\n", - "\n", - " if self.collaboration_type == 'user':\n", - " neighbor_ids = np.argsort(-self.similarities[mapped_user_id])[:self.n_neighbors]\n", - " user_similarities = self.similarities[mapped_user_id][neighbor_ids]\n", - "\n", - " item_ids = items_df['item_id'].tolist()\n", - "\n", - " v_i = self.r[neighbor_ids][:, item_ids]\n", - "\n", - " scores = np.matmul(user_similarities, v_i) / np.sum(user_similarities)\n", - "\n", - " # Choose n recommendations based on highest scores\n", - " if not self.should_recommend_already_bought:\n", - " x_list = self.interactions_df.loc[\n", - " self.interactions_df['user_id'] == mapped_user_id]['item_id'].tolist()\n", - " scores[x_list] = -1e100\n", - "\n", - " chosen_ids = np.argsort(-scores)[:n_recommendations]\n", - "\n", - " elif self.collaboration_type == 'item':\n", - " x_list = self.interactions_df.loc[\n", - " self.interactions_df['user_id'] == mapped_user_id]['item_id'].tolist()\n", - " scores = np.sum(self.similarities[x_list], axis=0)\n", - "\n", - " # Choose n recommendations based on highest scores\n", - " if not self.should_recommend_already_bought:\n", - " scores[x_list] = -1e100\n", - "\n", - " chosen_ids = np.argsort(-scores)[:n_recommendations]\n", - "\n", - " for item_id in chosen_ids:\n", - " recommendations.append(\n", - " {\n", - " 'user_id': self.user_id_reverse_mapping[mapped_user_id],\n", - " 'item_id': self.item_id_reverse_mapping[item_id],\n", - " 'score': scores[item_id]\n", - " }\n", - " )\n", - " else: # For new users recommend most popular items\n", - " for i in range(n_recommendations):\n", - " recommendations.append(\n", - " {\n", - " 'user_id': user['user_id'],\n", - " 'item_id': self.item_id_reverse_mapping[self.most_popular_items[i]],\n", - " 'score': 1.0\n", - " }\n", - " )\n", - "\n", - " user_recommendations = pd.DataFrame(recommendations)\n", - "\n", - " self.recommender_df = pd.concat([self.recommender_df, user_recommendations])\n", - "\n", - " return self.recommender_df\n", - " \n", - "\n", - "class UserBasedCosineNearestNeighborsRecommender(NearestNeighborsRecommender):\n", - " \n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " self.collaboration_type = 'user'\n", - " self.similarity_measure = 'cosine'\n", - " \n", - " \n", - "class UserBasedPearsonNearestNeighborsRecommender(NearestNeighborsRecommender):\n", - " \n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " self.collaboration_type = 'user'\n", - " self.similarity_measure = 'pearson'\n", - " \n", - " \n", - "class ItemBasedCosineNearestNeighborsRecommender(NearestNeighborsRecommender):\n", - " \n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " self.collaboration_type = 'item'\n", - " self.similarity_measure = 'cosine'\n", - " \n", - "\n", - "class ItemBasedPearsonNearestNeighborsRecommender(NearestNeighborsRecommender):\n", - " \n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " self.collaboration_type = 'item'\n", - " self.similarity_measure = 'pearson'" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "nonprofit-roads", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Recommendations\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iditem_idscoretitlegenres
013550.955688Flintstones, The (1994)Children|Comedy|Fantasy
11733230.291761Girl Who Kicked the Hornet's Nest, The (Luftslottet som sprängdes) (2009)Action|Crime|Mystery
2186050.280261Taxi 3 (2003)Action|Comedy
3136280.059450Flying Tigers (1942)Action|Drama|Romance|War
4167550.059450Bubba Ho-tep (2002)Comedy|Horror
5131650.051167Boiling Point (1993)Action|Drama
6140310.044312All the Pretty Horses (2000)Drama|Romance|Western
7119140.044312Smoke Signals (1998)Comedy|Drama
8172820.000000Hip Hop Witch, Da (2000)Comedy|Horror|Thriller
9121900.000000Why Do Fools Fall In Love? (1998)Drama
10440310.556855All the Pretty Horses (2000)Drama|Romance|Western
114733230.556855Girl Who Kicked the Hornet's Nest, The (Luftslottet som sprängdes) (2009)Action|Crime|Mystery
1243550.098477Flintstones, The (1994)Children|Comedy|Fantasy
13428060.098477Teaching Mrs. Tingle (1999)Comedy|Thriller
14456730.056855Punch-Drunk Love (2002)Comedy|Drama|Romance
15435670.056855Bossa Nova (2000)Comedy|Drama|Romance
1641450.049238Bad Boys (1995)Action|Comedy|Crime|Drama|Thriller
17431650.049238Boiling Point (1993)Action|Drama
18422750.000000Six-String Samurai (1998)Action|Adventure|Sci-Fi
19444830.000000Caddyshack II (1988)Comedy
20648960.653889Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)Adventure|Children|Fantasy
21619140.242896Smoke Signals (1998)Comedy|Drama
22640310.045903All the Pretty Horses (2000)Drama|Romance|Western
23622750.045903Six-String Samurai (1998)Action|Adventure|Sci-Fi
24635670.045903Bossa Nova (2000)Comedy|Drama|Romance
256733230.045543Girl Who Kicked the Hornet's Nest, The (Luftslottet som sprängdes) (2009)Action|Crime|Mystery
26615000.042938Grosse Pointe Blank (1997)Comedy|Crime|Romance
27644830.000000Caddyshack II (1988)Comedy
28672820.000000Hip Hop Witch, Da (2000)Comedy|Horror|Thriller
29621900.000000Why Do Fools Fall In Love? (1998)Drama
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Quick test of the recommender\n", - "\n", - "nearest_neighbors_recommender = NearestNeighborsRecommender()\n", - "nearest_neighbors_recommender.initialize(n_neighbors=20)\n", - "nearest_neighbors_recommender.fit(ml_ratings_df, None, ml_movies_df)\n", - "recommendations = nearest_neighbors_recommender.recommend(pd.DataFrame([[1], [4], [6]], columns=['user_id']), ml_movies_df, 10)\n", - "\n", - "recommendations = pd.merge(recommendations, ml_movies_df, on='item_id', how='left')\n", - "print(\"Recommendations\")\n", - "display(HTML(recommendations.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "framed-negative", - "metadata": {}, - "source": [ - "# Training-test split evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "nominated-balloon", - "metadata": {}, - "outputs": [], - "source": [ - "from evaluation_and_testing.testing import evaluate_train_test_split_implicit" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "romantic-music", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:146: RuntimeWarning: invalid value encountered in true_divide\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0UserBasedCosineNearestNeighborsRecommender0.1038960.1428570.2272730.4090910.1038960.1267770.1611410.219215
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ub_cos_nn_recommender = UserBasedCosineNearestNeighborsRecommender()\n", - "ub_cos_nn_recommender.initialize(n_neighbors=30)\n", - "\n", - "ub_cos_nn_tts_results = [['UserBasedCosineNearestNeighborsRecommender'] + list(evaluate_train_test_split_implicit(\n", - " ub_cos_nn_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]\n", - "\n", - "ub_cos_nn_tts_results = pd.DataFrame(\n", - " ub_cos_nn_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(ub_cos_nn_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "historical-moses", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0UserBasedPearsonNearestNeighborsRecommender0.1038960.1493510.2272730.4155840.1038960.1300240.1621610.221924
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ub_pearson_nn_recommender = UserBasedPearsonNearestNeighborsRecommender()\n", - "ub_pearson_nn_recommender.initialize(n_neighbors=30)\n", - "\n", - "ub_pearson_nn_tts_results = [['UserBasedPearsonNearestNeighborsRecommender'] + list(evaluate_train_test_split_implicit(\n", - " ub_pearson_nn_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]\n", - "\n", - "ub_pearson_nn_tts_results = pd.DataFrame(\n", - " ub_pearson_nn_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(ub_pearson_nn_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "official-department", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0ItemBasedCosineNearestNeighborsRecommender0.259740.5454550.6948050.9545450.259740.4213020.4824930.566174
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ib_cos_nn_recommender = ItemBasedCosineNearestNeighborsRecommender()\n", - "ib_cos_nn_recommender.initialize(n_neighbors=30)\n", - "\n", - "ib_cos_nn_tts_results = [['ItemBasedCosineNearestNeighborsRecommender'] + list(evaluate_train_test_split_implicit(\n", - " ib_cos_nn_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]\n", - "\n", - "ib_cos_nn_tts_results = pd.DataFrame(\n", - " ib_cos_nn_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(ib_cos_nn_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "african-python", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0ItemBasedPearsonNearestNeighborsRecommender0.1753250.3506490.4480520.5584420.1753250.277440.3173970.352948
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ib_pearson_nn_recommender = ItemBasedPearsonNearestNeighborsRecommender()\n", - "ib_pearson_nn_recommender.initialize(n_neighbors=30)\n", - "\n", - "ib_pearson_nn_tts_results = [['ItemBasedPearsonNearestNeighborsRecommender'] + list(evaluate_train_test_split_implicit(\n", - " ib_pearson_nn_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]\n", - "\n", - "ib_pearson_nn_tts_results = pd.DataFrame(\n", - " ib_pearson_nn_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(ib_pearson_nn_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "electronic-criticism", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0AmazonRecommender0.1818180.3116880.4025970.5519480.1818180.2578060.2946820.34147
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from recommenders.amazon_recommender import AmazonRecommender\n", - "\n", - "amazon_recommender = AmazonRecommender()\n", - "\n", - "amazon_tts_results = [['AmazonRecommender'] + list(evaluate_train_test_split_implicit(\n", - " amazon_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]\n", - "\n", - "amazon_tts_results = pd.DataFrame(\n", - " amazon_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(amazon_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "saving-harrison", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0TFIDFRecommender0.0259740.0909090.1363640.3181820.0259740.0643930.0836850.140799
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from recommenders.tfidf_recommender import TFIDFRecommender\n", - "\n", - "tfidf_recommender = TFIDFRecommender()\n", - "\n", - "tfidf_tts_results = [['TFIDFRecommender'] + list(evaluate_train_test_split_implicit(\n", - " tfidf_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]\n", - "\n", - "tfidf_tts_results = pd.DataFrame(\n", - " tfidf_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(tfidf_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "random-source", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0UserBasedCosineNearestNeighborsRecommender0.1038960.1428570.2272730.4090910.1038960.1267770.1611410.219215
1UserBasedPearsonNearestNeighborsRecommender0.1038960.1493510.2272730.4155840.1038960.1300240.1621610.221924
2ItemBasedCosineNearestNeighborsRecommender0.2597400.5454550.6948050.9545450.2597400.4213020.4824930.566174
3ItemBasedPearsonNearestNeighborsRecommender0.1753250.3506490.4480520.5584420.1753250.2774400.3173970.352948
4AmazonRecommender0.1818180.3116880.4025970.5519480.1818180.2578060.2946820.341470
5TFIDFRecommender0.0259740.0909090.1363640.3181820.0259740.0643930.0836850.140799
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "tts_results = pd.concat([ub_cos_nn_tts_results, ub_pearson_nn_tts_results, ib_cos_nn_tts_results, \n", - " ib_pearson_nn_tts_results, amazon_tts_results, tfidf_tts_results]).reset_index(drop=True)\n", - "display(HTML(tts_results.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "continued-harassment", - "metadata": {}, - "source": [ - "# Leave-one-out evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "external-realtor", - "metadata": {}, - "outputs": [], - "source": [ - "from evaluation_and_testing.testing import evaluate_leave_one_out_implicit" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "prerequisite-lounge", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0UserBasedCosineNearestNeighborsRecommender0.0966670.1466670.1866670.3066670.0966670.1242850.1407820.178962
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ub_cos_nn_recommender = UserBasedCosineNearestNeighborsRecommender()\n", - "ub_cos_nn_recommender.initialize(n_neighbors=30)\n", - "\n", - "ub_cos_nn_loo_results = [['UserBasedCosineNearestNeighborsRecommender'] + list(evaluate_leave_one_out_implicit(\n", - " ub_cos_nn_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]\n", - "\n", - "ub_cos_nn_loo_results = pd.DataFrame(\n", - " ub_cos_nn_loo_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(ub_cos_nn_loo_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "alternate-seller", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0UserBasedPearsonNearestNeighborsRecommender0.10.150.180.3133330.10.1271820.1395180.181748
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ub_pearson_nn_recommender = UserBasedPearsonNearestNeighborsRecommender()\n", - "ub_pearson_nn_recommender.initialize(n_neighbors=30)\n", - "\n", - "ub_pearson_nn_loo_results = [['UserBasedPearsonNearestNeighborsRecommender'] + list(evaluate_leave_one_out_implicit(\n", - " ub_pearson_nn_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]\n", - "\n", - "ub_pearson_nn_loo_results = pd.DataFrame(\n", - " ub_pearson_nn_loo_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(ub_pearson_nn_loo_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "regulation-economy", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0ItemBasedCosineNearestNeighborsRecommender0.2666670.420.5133330.650.2666670.3577360.3960330.440599
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ib_cos_nn_recommender = ItemBasedCosineNearestNeighborsRecommender()\n", - "ib_cos_nn_recommender.initialize(n_neighbors=30)\n", - "\n", - "ib_cos_nn_loo_results = [['ItemBasedCosineNearestNeighborsRecommender'] + list(evaluate_leave_one_out_implicit(\n", - " ib_cos_nn_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]\n", - "\n", - "ib_cos_nn_loo_results = pd.DataFrame(\n", - " ib_cos_nn_loo_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(ib_cos_nn_loo_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "changed-affair", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0ItemBasedPearsonNearestNeighborsRecommender0.1733330.280.3366670.420.1733330.2345220.2577590.284723
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ib_pearson_nn_recommender = ItemBasedPearsonNearestNeighborsRecommender()\n", - "ib_pearson_nn_recommender.initialize(n_neighbors=30)\n", - "\n", - "ib_pearson_nn_loo_results = [['ItemBasedPearsonNearestNeighborsRecommender'] + list(evaluate_leave_one_out_implicit(\n", - " ib_pearson_nn_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]\n", - "\n", - "ib_pearson_nn_loo_results = pd.DataFrame(\n", - " ib_pearson_nn_loo_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(ib_pearson_nn_loo_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "varying-customs", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0AmazonRecommender0.1666670.2566670.320.4266670.1666670.2190860.2454860.279978
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from recommenders.amazon_recommender import AmazonRecommender\n", - "\n", - "amazon_recommender = AmazonRecommender()\n", - "\n", - "amazon_loo_results = [['AmazonRecommender'] + list(evaluate_leave_one_out_implicit(\n", - " amazon_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]\n", - "\n", - "amazon_loo_results = pd.DataFrame(\n", - " amazon_loo_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(amazon_loo_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "behind-cambodia", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0TFIDFRecommender0.0066670.0533330.1233330.2333330.0066670.0334910.0621780.096151
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "tfidf_recommender = TFIDFRecommender()\n", - "\n", - "tfidf_loo_results = [['TFIDFRecommender'] + list(evaluate_leave_one_out_implicit(\n", - " tfidf_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]\n", - "\n", - "tfidf_loo_results = pd.DataFrame(\n", - " tfidf_loo_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(tfidf_loo_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "lightweight-password", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0UserBasedCosineNearestNeighborsRecommender0.0966670.1466670.1866670.3066670.0966670.1242850.1407820.178962
1UserBasedPearsonNearestNeighborsRecommender0.1000000.1500000.1800000.3133330.1000000.1271820.1395180.181748
2ItemBasedCosineNearestNeighborsRecommender0.2666670.4200000.5133330.6500000.2666670.3577360.3960330.440599
3ItemBasedPearsonNearestNeighborsRecommender0.1733330.2800000.3366670.4200000.1733330.2345220.2577590.284723
4AmazonRecommender0.1666670.2566670.3200000.4266670.1666670.2190860.2454860.279978
5TFIDFRecommender0.0066670.0533330.1233330.2333330.0066670.0334910.0621780.096151
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "loo_results = pd.concat([ub_cos_nn_loo_results, ub_pearson_nn_loo_results, ib_cos_nn_loo_results, \n", - " ib_pearson_nn_loo_results, amazon_loo_results, tfidf_loo_results]).reset_index(drop=True)\n", - "display(HTML(loo_results.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "ordinary-milwaukee", - "metadata": {}, - "source": [ - "# Tasks" - ] - }, - { - "cell_type": "markdown", - "id": "resident-empire", - "metadata": {}, - "source": [ - "**Task 1.** Add euclidean distance as eligible similarity measure in the nearest neighbors recommender and compare the results of such a recommender to other recommenders tested in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "studied-heart", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code in the original class and tests here" - ] - }, - { - "cell_type": "markdown", - "id": "vocal-istanbul", - "metadata": {}, - "source": [ - "**Task 2.** Find the optimal number of neighbors for the User-Based Cosine Nearest Neighbors Recommender for $1 \\leq \\text{n_neighbors} \\leq 100$ and the train-test split testing scheme." - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "commercial-belly", - "metadata": {}, - "outputs": [], - "source": [ - "# Write your code here" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/environment.yml b/environment.yml index c0114cc..601a1e5 100644 --- a/environment.yml +++ b/environment.yml @@ -84,3 +84,4 @@ dependencies: - urllib3==1.26.4 - wcwidth==0.2.5 - webencodings==0.5.1 + - hyperopt==0.2.5 diff --git a/jupyter_test.ipynb b/jupyter_test.ipynb deleted file mode 100644 index 9db331f..0000000 --- a/jupyter_test.ipynb +++ /dev/null @@ -1,224 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "spread-happiness", - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import torch\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "from IPython.display import Markdown, display, HTML\n", - "\n", - "# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n", - "import os\n", - "os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "adult-compensation", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Numpy array\n", - "[[1. 2. 3.]\n", - " [4. 5. 6.]\n", - " [7. 8. 9.]]\n", - "\n", - "Pandas DataFrame\n", - " A B C\n", - "0 1.0 2.0 3.0\n", - "1 4.0 5.0 6.0\n", - "2 7.0 8.0 9.0\n", - "\n", - "Pretty display of pandas DataFrame\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABC
1.02.03.0
4.05.06.0
7.08.09.0
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "PyTorch tensor\n", - "tensor([[1., 2., 3.],\n", - " [4., 5., 6.],\n", - " [7., 8., 9.]], dtype=torch.float64)\n", - "\n", - "Matplolib chart\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAhNElEQVR4nO3deWAU9d3H8fcPCOQAwpFwhhBuyAGI4VaLiBUVRcCz9UTFtmprn1aIKAiCFahaaWtVtIpWq48lQRARUcSjXggeOTlCuEICCQRy3/t7/ki0lEdFsptMdvfz+iebyWbnM4Z8nMzOfMdYaxEREe/TwukAIiLSMCpwEREvpQIXEfFSKnARES+lAhcR8VKtmnJlYWFhNioqqilXKSLi9bZt23bEWht+8vImLfCoqCi2bt3alKsUEfF6xph937Vch1BERLyUClxExEupwEVEvFSTHgP/LtXV1WRnZ1NRUeF0lCYXGBhIREQEAQEBTkcRES/keIFnZ2fTrl07oqKiMMY4HafJWGs5evQo2dnZ9OnTx+k4IuKFTnkIxRjzrDEmzxiTesKyTsaYt40xu+o/dmxogIqKCjp37uxX5Q1gjKFz585++ZeHiHjGjzkGvhKYfNKyBGCTtXYAsKn+8wbzt/L+hr9ut4h4xikL3Fr7AVBw0uKpwPP1j58HLvNsLBER33DweDkLX0+jptbl8ddu6FkoXa21ufWPDwFdv++JxphZxpitxpit+fn5DVxd42rbti0AOTk5XH755Q6nERFf4HJZ/vHJXn766Pu8suUA6blFHl+H229iWmutMeZ77wphrV0BrACIj49v1neP6NGjB6tWrWrUddTU1NCqlePvHYtII8rKLyEhMYUtews4e0AYf5gWR69OwR5fT0P3wA8bY7oD1H/M81wk5+zdu5fY2FgAVq5cyfTp05k8eTIDBgxg9uzZ3z5v48aNjB07lhEjRnDFFVdQUlICwAMPPMDIkSOJjY1l1qxZfHO3owkTJnDXXXcRHx/P8uXLm37DRKRJ1NS6eOK93Uxe/iHbDxWx7PKhvDBzVKOUNzR8D3wtcAOwpP7jGk+EWfh6Guk5nv0zI7pHe+6/JKZB3/vVV1/x5Zdf0qZNGwYNGsSdd95JUFAQixcv5p133iEkJISlS5fy6KOPMn/+fO644w7mz58PwHXXXce6deu45JJLAKiqqtIcGBEflpZTyJzEZFIPFjE5phsPTI2hS/vARl3nKQvcGPMyMAEIM8ZkA/dTV9yvGmNuBvYBVzZmSKecd955hIaGAhAdHc2+ffs4fvw46enpjB8/Hqgr5rFjxwKwefNmli1bRllZGQUFBcTExHxb4FdddZUzGyEijaqiupa/vLuLJ9/PomNwa574+QgujOveJOs+ZYFba6/5ni+d5+EsDd5Tbixt2rT59nHLli2pqanBWsv555/Pyy+//F/Praio4Fe/+hVbt26lV69eLFiw4L/O8Q4JCWmy3CLSNLbuLWB2YjJZ+aXMGBHBvClD6BDcusnWr1kop2nMmDF89NFHZGZmAlBaWsrOnTu/LeuwsDBKSkoa/c1QEXFOaWUNC9amccVTn1BZ7eKFmaN45MphTVre0Awupfc24eHhrFy5kmuuuYbKykoAFi9ezMCBA7n11luJjY2lW7dujBw50uGkItIYPtiZzz1JKeQUlnP9mN7MnjyYkDbOVKn55kyJphAfH29PfiMvIyODIUOGNFmG5sbft1/EWxwvq2LxGxms2pZN3/AQls0YSnxUpyZZtzFmm7U2/uTl2gMXETmFN1NymbcmjWNlVdx+bj/unDiAwICWTsdSgYuIfJ+8ogrmr0ljQ9ohYnq05/mZI4npEep0rG81iwK31vrlYKemPHwlIj+etZZV27JZtC6dihoXsycPYtbZfWnVsnmd9+F4gQcGBnL06FG/Gyn7zTzwwMDGPdFfRE7PgYIy5q5O4cNdRxgV1YmHZsTRL7yt07G+k+MFHhERQXZ2Ns110FVj+uaOPCLivNr64VPL3tqBARZNjeHno3vTokXz3bF0vMADAgJ0RxoRcVRmXglzEpPZtu8YPxkYzh+mx9GzQ5DTsU7J8QIXEXFKda2LFR9ksfydXQS3acmjVw5j2hk9veZwrgpcRPxS6sFC7l6VTEZuERfHdWfBpTGEt2tz6m9sRlTgIuJXKqpreeydXTz9YRadQlrz5LVnMjm2m9OxGkQFLiJ+Y8ueAhISk8k6UsqV8RHce1E0ocEBTsdqMBW4iPi8ksoalr65nX98uo+IjkG8ePNozhoQ5nQst6nARcSnbd6Rx71JKeQWVTBzfB9+f8FAglv7RvX5xlaIiJzkWGkVi9alk/TlQfp3aUviL8cxIrKj07E8SgUuIj7FWsv6lEPcvzaV42XV/Hpif26f2J82rZwfPuVpKnAR8RmHiyqY91oqG9MPE9czlH/cPJoh3ds7HavRqMBFxOtZa3l16wEWv5FBVY2LuRcNZub4Ps1u+JSnqcBFxKvtP1rGPauT+SjzKKP7dGLpjKFEhfnHPWhV4CLilWpdlpUf7+Xht3bQsoXhwWmxXDMyslkPn/I0FbiIeJ1dh4uZnZjMl/uPM3FwFx6cFkv30OY/fMrTVOAi4jWqalw8+f5u/vpuJiFtWvLYVcOZOryH1wyf8jQVuIh4heTs48xelcz2Q8VcMqwH918STVhb7xo+5WkqcBFp1sqrannsnZ08/WEW4e3a8PT18Zwf3dXpWM2CClxEmq1Ps46SkJjM3qNlXDOqF/dcNIT2gd47fMrTVOAi0uwUV1Sz5M3tvPTZfiI7BfPPW0Yzrr/3D5/yNBW4iDQr724/zL2rUzlcVMGtZ/fhf84fRFBr37sM3hNU4CLSLBwtqeSBdems+SqHgV3b8sS14xneq4PTsZo1FbiIOMpay+vJuSxYm0ZxRTV3TRrAryb0p3Ur374M3hNU4CLimEOFFdz3WgrvZOQxrFcHls0YyqBu7ZyO5TXcKnBjzG+BWwALpAA3WWsrPBFMRHyXy2V55fMDPLQ+g2qXi/suHsJN4/vQ0o8ug/eEBhe4MaYn8Gsg2lpbbox5FbgaWOmhbCLig/YeKSUhKZlPswoY27czS2bE0buzfwyf8jR3D6G0AoKMMdVAMJDjfiQR8UW1Lsuz/97DI2/vIKBFC5ZMj+Oqkb389jJ4T2hwgVtrDxpjHgb2A+XARmvtxpOfZ4yZBcwCiIyMbOjqRMSL7ThUzOxVX/N1diGThnRh8WVxdAsNdDqW12vw27zGmI7AVKAP0AMIMcZce/LzrLUrrLXx1tr48PDwhicVEa9TWVPLn97eyZS/fMiBY+X8+ZozePr6eJW3h7hzCGUSsMdamw9gjEkCxgEveiKYiHi3L/cfY05iMjsPl3DZ8B7MvySGTiGtnY7lU9wp8P3AGGNMMHWHUM4DtnoklYh4rfKqWh7ZuIO/f7SHbu0DefbGeCYO1vCpxuDOMfDPjDGrgC+AGuBLYIWngomI9/l49xESElPYX1DGz0ZHcs+Fg2mn4VONxq2zUKy19wP3eyiLiHipoopqHlqfwctbDhDVOZhXZo1hTN/OTsfyeboSU0Tc8k76Ye59LYX84kpmndOX304aqOFTTUQFLiINcrSkkgWvp/P61zkM7taOp6+PZ2hEB6dj+RUVuIicFmsta77KYeHraZRU1vA/5w/kFz/pp+FTDlCBi8iPlnO8nPteS+Xd7XmcEdmBpTOGMrCrhk85RQUuIqfkcln+uWU/S97cTq3LMn9KNDeMi9LwKYepwEXkB+05UsqcxGS27CngrP5hPDQ9jl6dgp2OJajAReR71NS6eObfe/jT2ztp3aoFy2YM5Yr4CA2fakZU4CLy/2TkFjEnMZnk7EJ+Gt2VRZfF0rW95pc0NypwEflWZU0tj7+byd/e201oUACP/2wEF8V10153M6UCFxEAtu2rGz6VmVfC9DN6Mm9KNB01fKpZU4GL+Lmyqhr++NYOVn68lx6hQTx300jOHdTF6VjyI6jARfzYv3cdISEpmexj5Vw/tjezJw+mbRvVgrfQT0rEDxWWVfPg+nRe3ZpN37AQXr1tLKP6dHI6lpwmFbiIn9mQeoh5a1IpKK3iVxP68evzBhAYoOFT3kgFLuIn8osrWbA2jTdSconu3p7nbhxJbM9Qp2OJG1TgIj7OWkvSFwd5YF065dW13H3BIGad05eAlho+5e1U4CI+LPtYGXNXp/LBznzO7N2RpTOG0r9LW6djiYeowEV8kMtlefGzfSx9czsWWHhpDNeN6U0LDZ/yKSpwER+zO7+EOauS2brvGGcPqBs+FdFRw6d8kQpcxEdU17pY8UEWyzftIiigJQ9fMYwZI3rqMngfpgIX8QGpBwuZk5hMWk4RF8Z2Y+HUGLq00/ApX6cCF/FiFdW1/OXdXTz5fhYdg1vz5LUjmBzb3elY0kRU4CJeauveAmYnJpOVX8rlZ0Yw7+JoQoMDnI4lTUgFLuJlSivrhk89/0nd8KkXZo7inIHhTscSB6jARbzI+zvzmZuUQk5hOTeMjeLuCwYRouFTfks/eREvcLysikXrMkj8Ipt+4SH867axxEdp+JS/U4GLNHNvpuQyb00ax8uquP3cftw5UcOnpI4KXKSZyiuqYP6aNDakHSK2Z3uenzmSmB4aPiX/oQIXaWastfxrWzaL16VTUeNizuTB3Hp2H1pp+JScRAUu0owcKChj7uoUPtx1hFFRnVgyI46+4Ro+Jd/NrQI3xnQAngFiAQvMtNZ+4oFcIn6l1mV54ZO9/PGtHRhg0dQYfj5aw6fkh7m7B74c2GCtvdwY0xrQxByR05SZV8zsVcl8sf84EwaF8+C0OHp2CHI6lniBBhe4MSYUOAe4EcBaWwVUeSaWiO+rrnXx1Pu7+fOmTILbtORPVw3jsuEaPiU/njt74H2AfOA5Y8wwYBvwG2tt6YlPMsbMAmYBREZGurE6Ed+Rkl3I7MRkMnKLuHhodxZeGkNY2zZOxxIv487b2q2AEcAT1tozgFIg4eQnWWtXWGvjrbXx4eG63Ff8W0V1LUve3M5lf/uIIyWVPHXdmTz+sxEqb2kQd/bAs4Fsa+1n9Z+v4jsKXETqbNlTQEJiMllHSrkqvhdzLxqi4VPilgYXuLX2kDHmgDFmkLV2B3AekO65aCK+obiimmUbdvCPT/cR0TGIF28ezVkDwpyOJT7A3bNQ7gReqj8DJQu4yf1IIr5j84487k1KIbeogpnj+/D7CwYS3FqXX4hnuPUvyVr7FRDvmSgivuNYaRWL1qWT9OVB+ndpS+IvxzEisqPTscTHaFdAxIOstbyRksv9a9IoLK/m1xP7c/vE/rRppeFT4nkqcBEPOVxUwX2vpfJ2+mGGRoTy4i2jGdK9vdOxxIepwEXcZK3l1a0HWPxGBlU1LuZeNJiZ4zV8ShqfClzEDfuPlpGQlMzHu48yuk8nls4YSlRYiNOxxE+owEUaoNZlWfnxXh5+awctWxgenBbLNSMjNXxKmpQKXOQ07TxcN3zqqwPHmTi4Cw9Oi6V7qIZPSdNTgYv8SFU1Lp54bzd/3byLdoEBLL96OJcO66HhU+IYFbjIj/D1gePMSUxm+6FiLhnWgwWXRNNZ80vEYSpwkR9QXlXLY+/s5OkPs+jSLpBnro9nUnRXp2OJACpwke/1adZREhKT2Xu0jGtG9eKei4bQPlDDp6T5UIGLnKS4opolb27npc/2E9kpmH/eMppx/TV8SpofFbjICd7dfpi5SankFVdwy1l9+N1PBxHUWpfBS/OkAhcBjpZU8sC6dNZ8lcOgru148rozGd6rg9OxRH6QClz8mrWW15NzWbA2jeKKan5z3gBuP7c/rVvpMnhp/lTg4rdyC8uZ91oq72TkMaxXB5bNGMqgbu2cjiXyo6nAxe+4XJZXPj/AQ+szqHa5uO/iIdw0vg8tdRm8eBkVuPiVvUdKSUhK5tOsAsb27cySGXH07qzhU+KdVODiF2pqXTz70R4e2biT1i1b8ND0OK4e2UuXwYtXU4GLz9t+qIg5q5L5OruQSUO6sviyWLqFBjodS8RtKnDxWZU1tTy+eTd/25xJaFAAf7nmDKYM7a69bvEZKnDxSV/uP8bsVcnsyivhsuE9mH9JDJ1CWjsdS8SjVODiU8qqanhk406e/WgPXdsF8uyN8UwcrOFT4ptU4OIzPs48QkJSCvsLyvj56EgSLhxMOw2fEh+mAhevV1hezUPrM3jl8wNEdQ7mlVljGNO3s9OxRBqdCly82tvph7nvtRTyiyu57Sd9+e2kgQQGaPiU+AcVuHilIyWVLFibxrrkXAZ3a8fT18czNKKD07FEmpQKXLyKtZY1X+Ww8PU0Sitr+d35A/nFhH4EtNTwKfE/KnDxGjnHy7l3dQqbd+RzRmTd8KkBXTV8SvyXClyaPZfL8tKW/SxZn4HLwvwp0dwwLkrDp8TvqcClWcvKLyEhKYUtewo4q38YD02Po1enYKdjiTQLKnBplmpqXTzz7z386e2dtGnVgmWXD+WKMyN0GbzICdwucGNMS2ArcNBaO8X9SOLv0nOKmJ34NakHi7ggpiuLpsbSpb2GT4mczBN74L8BMoD2Hngt8WOVNbX8ZVMmT76/mw7BATz+sxFcFNdNe90i38OtAjfGRAAXAw8C/+ORROKXtu07xpzEZDLzSpg+oifzLo6mo4ZPifwgd/fAHwNmA997LpcxZhYwCyAyMtLN1YmvKa2s4eGNO1j58V56hAbx/MxR/GRguNOxRLxCgwvcGDMFyLPWbjPGTPi+51lrVwArAOLj421D1ye+58Nd+dyTlEL2sXKuH9ub2ZMH07aN3lcX+bHc+W0ZD1xqjLkICATaG2NetNZe65lo4qsKy6p5cH06r27Npm9YCK/eNpZRfTo5HUvE6zS4wK219wD3ANTvgf9e5S2nsiH1EPPWpFJQWsUvJ/TjN+cN0PApkQbS36vSJPKLK7l/bSrrUw4R3b09z904ktieoU7HEvFqHilwa+17wHueeC3xLdZakr44yAPr0imvruXuCwYx65y+Gj4l4gHaA5dGk32sjLmrU/lgZz7xvTuyZMZQ+ndp63QsEZ+hAhePc7ksL362j6VvbscCCy+N4boxvWmh4VMiHqUCF4/anV/CnFXJbN13jHMGhvOHabFEdNTwKZHGoAIXj6iudbHigyyWb9pFUEBLHr5iGDNG9NRl8CKNSAUubks9WMicxGTScoq4KK4bCy6NoUs7DZ8SaWwqcGmwiupa/rxpF099kEXH4NY8ee0IJsd2dzqWiN9QgUuDbN1bwOzEZLLyS7nizAjuuzia0OAAp2OJ+BUVuJyWksoa/rhhOy98uo8eoUG8MHMU52j4lIgjVODyo72/M5+5SSnkFJZzw9go7r5gECEaPiXiGP32ySkdL6ti0boMEr/Ipl94CP+6bSzxURo+JeI0Fbj8oDdTcpm3Jo3jZVXccW5/7pjYX8OnRJoJFbh8p7yiCuavSWND2iFie7bn+Zkjiemh4VMizYkKXP6LtZZ/bctm8bp0KmtcJFw4mFvO6kMrDZ8SaXZU4PKtAwVlzF2dwoe7jjAqqhNLZsTRN1zDp0SaKxW4UOuyvPDJXpZt2EELA4sui+XnoyI1fEqkmVOB+7nMvGJmr0rmi/3HmTAonAenxdGzQ5DTsUTkR1CB+6nqWhdPvb+bP2/KJLhNS/501TAuG67hUyLeRAXuh1KyC7l71ddsP1TMxUO7s/DSGMLatnE6loicJhW4H6moruWxd3bx9IdZdA5pzVPXnckFMd2cjiUiDaQC9xOfZR0lISmFPUdKuSq+F3MvHkJokIZPiXgzFbiPK66oZtmGHfzj03306hTES7eMZnz/MKdjiYgHqMB92OYdedyblEJuUQUzx/fh9xcMJLi1fuQivkK/zT7oWGkVi9alk/TlQQZ0aUviL8cxIrKj07FExMNU4D7EWssbKbncvyaNwvJqfn3eAG4/tx9tWmn4lIgvUoH7iMNFFdz3Wipvpx9maEQoL94ymiHd2zsdS0QakQrcy1lr+d/PD/Dg+gyqalzcc+FgbtbwKRG/oAL3YvuPlpGQlMzHu48yuk8nls4YSlRYiNOxRKSJqMC9UK3L8txHe3h44w5atWjBH6bFcfXIXho+JeJnVOBeZsehYmYnJvP1geNMHNyFB6fF0j1Uw6dE/JEK3EtU1bj423uZPL45k3aBASy/ejiXDuuh4VMifqzBBW6M6QW8AHQFLLDCWrvcU8HkP74+cJzZq5LZcbiYqcN7MH9KNJ01fErE77mzB14D/M5a+4Uxph2wzRjztrU23UPZ/F55VS2Pvr2Dv/97D13aBfLM9fFMiu7qdCwRaSYaXODW2lwgt/5xsTEmA+gJqMA94JPdR0lISmbf0TJ+NjqShAsH0z5Qw6dE5D88cgzcGBMFnAF89h1fmwXMAoiMjPTE6nxaUUU1D63fzstb9tO7czD/vHU04/pp+JSI/H9uF7gxpi2QCNxlrS06+evW2hXACoD4+Hjr7vp82aaMw9y7OpW84gpmndOX304aSFBrXQYvIt/NrQI3xgRQV94vWWuTPBPJ/xwtqWTh6+ms/TqHQV3b8eR1ZzK8VwenY4lIM+fOWSgG+DuQYa191HOR/Ie1lrVf57Dw9XSKK6r57aSB/HJCP1q30mXwInJq7uyBjweuA1KMMV/VL5trrV3vdio/kFtYzn2rU9m0PY/hvTqw7PKhDOzazulYIuJF3DkL5d+AriI5TS6X5eXP9/PQ+u3UuizzpkRz47goWuoyeBE5TboSswntPVJKQlIyn2YVMK5fZ5ZMH0pk52CnY4mIl1KBN4GaWhfPfrSHRzbupHWrFiydEceV8b10GbyIuEUF3sgycouYk5hMcnYhk4Z05cFpsXRtH+h0LBHxASrwRlJZU8vjm3fzt82ZhAYF8NefncHFcd211y0iHqMCbwRf7D/GnFXJ7MorYdoZPZk/JZqOIa2djiUiPkYF7kFlVTU8snEnz360h27tA3nuxpGcO7iL07FExEepwD3ko8wjJCQlc6CgnGvHRDJn8mDaafiUiDQiFbibCsureWh9Bq98foA+YSH876wxjO7b2elYIuIHVOBu2Jh2iPteS+VoaRW/+Ek/7po0gMAADZ8SkaahAm+A/OJKFryexhvJuQzp3p6/3zCSuIhQp2OJiJ9RgZ8Gay2vfXWQha+nU1ZZy+/OH8gvJvQjoKWGT4lI01OB/0g5x8u5d3UKm3fkMyKybvhU/y4aPiUizlGBn4LLZXlpy36WrM/AZWH+lGhu0PApEWkGVOA/ICu/hITEFLbsLeDsAWH8YVocvTpp+JSINA8q8O9QU+vi6Q/38Kd3dhLYqgV/vHwol58ZocvgRaRZUYGfJD2niNmJX5N6sIgLYrqyaGosXTR8SkSaIRV4vYrqWv76biZPvr+bDsEB/O3nI7gorrvTsUREvpcKHNi27xhzEpPJzCth+oi64VMdgjV8SkSaN78u8NLKGv741g6e/2QvPUKDWHnTSCYM0vApEfEOflvgH+7K556kFA4eL+f6Mb25e/Jg2rbx2/8cIuKF/K6xCsuqWfxGOv/alk3f8BD+ddtY4qM6OR1LROS0+VWBb0jNZd6aNApKq7j93H7cOVHDp0TEe/lFgecVV3D/mjTeTD1ETI/2PHfjSGJ7aviUiHg3ny5way2JXxxk0bp0yqtrmT15ELee3VfDp0TEJ/hsgWcfK2Pu6lQ+2JlPfO+OLJkxlP5d2jodS0TEY3yuwF0uyz8+3cfSDdsBWHhpDNeN6U0LDZ8SER/jUwWemVdCQmIyW/cd45yB4fxhWiwRHTV8SkR8k08UeHWtixUfZLF80y6CAlryyBXDmD6ip4ZPiYhP8/oCTz1YyOxVyaTnFnFhbDcWTo2hSzsNnxIR3+e1BV5RXcvyTbtY8UEWnUJa8+S1I5gcq+FTIuI/vLLAP99bwJxVyWQdKeXK+AjuvSia0OAAp2OJiDQptwrcGDMZWA60BJ6x1i7xSKrvUVJZw7IN23nhk31EdAzixZtHc9aAsMZcpYhIs9XgAjfGtAQeB84HsoHPjTFrrbXpngp3ovd25HHv6lRyCsu5cVwUd18wiBANnxIRP+ZOA44CMq21WQDGmFeAqYDHC/yepBRe3rKf/l3asuoX4zizd0dPr0JExOu4U+A9gQMnfJ4NjD75ScaYWcAsgMjIyAatKKpzMHdO7M8dE/vTppWGT4mIQBO8iWmtXQGsAIiPj7cNeY3bftLPo5lERHyBO1OdDgK9Tvg8on6ZiIg0AXcK/HNggDGmjzGmNXA1sNYzsURE5FQafAjFWltjjLkDeIu60wiftdameSyZiIj8ILeOgVtr1wPrPZRFREROg+5sICLipVTgIiJeSgUuIuKlVOAiIl7KWNuga2satjJj8oF9Dfz2MOCIB+N4A22zf9A2+z53t7e3tTb85IVNWuDuMMZstdbGO52jKWmb/YO22fc11vbqEIqIiJdSgYuIeClvKvAVTgdwgLbZP2ibfV+jbK/XHAMXEZH/5k174CIicgIVuIiIl/KKAjfGTDbG7DDGZBpjEpzO05iMMb2MMZuNMenGmDRjzG+cztRUjDEtjTFfGmPWOZ2lKRhjOhhjVhljthtjMowxY53O1NiMMb+t/3edaox52RgT6HQmTzPGPGuMyTPGpJ6wrJMx5m1jzK76jx65L2SzL/ATbp58IRANXGOMiXY2VaOqAX5nrY0GxgC3+/j2nug3QIbTIZrQcmCDtXYwMAwf33ZjTE/g10C8tTaWujHUVzubqlGsBCaftCwB2GStHQBsqv/cbc2+wDnh5snW2irgm5sn+yRrba619ov6x8XU/VL3dDZV4zPGRAAXA884naUpGGNCgXOAvwNYa6ustccdDdU0WgFBxphWQDCQ43Aej7PWfgAUnLR4KvB8/ePngcs8sS5vKPDvunmyzxcagDEmCjgD+MzhKE3hMWA24HI4R1PpA+QDz9UfNnrGGBPidKjGZK09CDwM7AdygUJr7UZnUzWZrtba3PrHh4CunnhRbyhwv2SMaQskAndZa4ucztOYjDFTgDxr7TanszShVsAI4Alr7RlAKR76s7q5qj/uO5W6/3n1AEKMMdc6m6rp2bpztz1y/rY3FLjf3TzZGBNAXXm/ZK1NcjpPExgPXGqM2UvdIbKJxpgXnY3U6LKBbGvtN39draKu0H3ZJGCPtTbfWlsNJAHjHM7UVA4bY7oD1H/M88SLekOB+9XNk40xhrrjohnW2kedztMUrLX3WGsjrLVR1P1837XW+vSembX2EHDAGDOoftF5QLqDkZrCfmCMMSa4/t/5efj4G7cnWAvcUP/4BmCNJ17UrXtiNgU/vHnyeOA6IMUY81X9srn19x8V33In8FL9jkkWcJPDeRqVtfYzY8wq4Avqzrb6Eh+8pN4Y8zIwAQgzxmQD9wNLgFeNMTdTN1L7So+sS5fSi4h4J284hCIiIt9BBS4i4qVU4CIiXkoFLiLipVTgIiJeSgUuIuKlVOAiIl7q/wDJI5cVWj3dfwAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Seaborn chart\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtgAAAFqCAYAAAAgOS3mAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAACIJklEQVR4nO3deZxkZXk2/us5p/bq6m2me2aYAQTBISCbgOwiGGQdcMGA+DJGhbxJNCTEoKIkGhXFJS+iyc9ENIkxuICiiBhAWRJkUEDZgogsw8AwM72vtZ7l/v3xnFN1qrqqu7q7qruq+/p+aHq6q7rqqVPbde66n+coEREQEREREVFDGMs9ACIiIiKilYQBm4iIiIiogRiwiYiIiIgaiAGbiIiIiKiBGLCJiIiIiBqIAZuIiIiIqIEYsIla0M6dO3HkkUcu9zCIiJbc008/jT/8wz/EW9/6VuzcubOp17V582aMjo429TpodWLAJiIiopZx991349hjj8UPf/hDbNq0abmHQ7QgoeUeAFGjpdNpXHXVVdixYwcMw8AhhxyCT37ykzAMA/fccw+++tWvwrIsxGIxfPjDH8aRRx6Jq666CplMBtdffz2effZZbN26Fd/61rdwwAEHFC/3ueeewwc/+MEZ17d161a8/e1vL/vdJZdcgkMOOQS//OUvMTIygq1bt2JkZAQPPfQQstksvvSlL2Hz5s147LHH8IUvfAGFQgFDQ0M44YQT8JnPfGbGdXz1q1/FXXfdBdd1sXHjRnz84x/HunXrGr/xiGhVW+7Xzx//+Mf4zne+A8dxkMvl8A//8A+4+eab8Z3vfAeu66K7uxt/+7d/i1e/+tX4yEc+gmg0iieffBLDw8M466yz0Nvbi3vvvRdDQ0P49Kc/jeOPPx7bt2/HJz/5SWQyGQwODuKggw7Cl770JUSj0bKx1LoeogURohXmhz/8obz3ve8VERHbtuVjH/uYvPjii7J9+3Y599xzZXR0VEREfv/738uJJ54o6XRa0um0vPnNb5ZbbrlFzjnnHPnxj3+8qDH8n//zf+QDH/iAiIg89thj8prXvEbuvvtuERG55ppr5OqrrxYRkSuuuEJ++ctfiojI9PS0HHvssfLkk0/Kyy+/LEcccUTx9vzVX/2VWJYlIiLf/e535dJLL13U+IiIqmmF188vf/nL8vd///ciIvKrX/1KLr74YslkMiIicv/998tZZ50lIiIf/vCH5R3veIcUCgUZHByU17zmNfIf//EfIiLy7//+7/Ke97xHRESuvfZa+dGPfiQiIoVCQc4991y54447RETkNa95jYyMjMx6PUQLwQo2rThHHXUUrrvuOlxyySU44YQT8O53vxv77rsvbrzxRgwODuKP//iPi+dVSuGll17CQQcdhOuuuw5/9Ed/hPPOOw9btmyZcbnzqWADwOmnnw4A2HvvvQEAJ598MgBgn332wUMPPQQAuPbaa/E///M/+Od//me88MILyOVyyGQy6O7uLl7OvffeiyeffLJ4Ha7rIpvNLmzjEBHNolVeP3333XcfduzYgYsuuqj4u4mJCYyPjwMATj31VITDYfT19SGRSJS9zvrnufLKK/HAAw/ghhtuwIsvvojBwUFkMpm6ryf4ekxULwZsWnH23ntv/OxnP8OvfvUr/PKXv8R73vMeXH311XBdF8cffzy+9KUvFc+7e/du9Pf3AwC2b9+O7u5uPP300ygUCohEImWXe8ABB+DWW2+texyVfx8Oh2ec513vehcOOuggnHzyyTjrrLPw+OOPQ0TKzuO6Li699FJcfPHFAIBCoYCJiYm6x0FEVK9Wef30ua6L888/H1deeWXx58HBQXR1dQGY+TobCs2MNX/9138Nx3Fw1lln4Y1vfCN2795d9XV2tushmi9OcqQV59vf/jauuuoqnHTSSbjyyitx0kkn4dlnn8Vxxx2HBx54AM8//zwA4L//+79x3nnnIZ/PY+fOnbjmmmvwr//6r9h///3xxS9+senjnJiYwP/+7//ib/7mb/DmN78ZAwMDeOmll+C6btn5TjrpJHz/+9/H9PQ0AOD666/Hhz70oaaPj4hWn1Z7/TzxxBNx++23Y3BwEADwne98B+9+97vndRm/+MUv8P73vx9nn302lFJ4/PHH4ThOw6+HKIgVbFpx3vKWt+Chhx7C2WefjXg8jr322gtbt25FV1cXPvnJT+Kv//qvISIIhUL46le/ikgkgg9+8IN43/veh9e85jX4u7/7O2zZsgUnnHAC3vjGNzZtnF1dXfiTP/kTvPWtb0V3dzd6enrwute9Djt27Ci2lQDAO97xDgwMDOCP/uiPoJTChg0bcO211zZtXES0erXa6+fJJ5+Myy67DO9973uhlEJHRwf+8R//EUqpui/jiiuuwPvf/350dXUhHo/jmGOOwUsvvdTw6yEKUlL5OQkRERERES0YW0SIiIiIiBqIAZuIiIiIqIEYsImIiIiIGogBm4iIiIiogRiwiYiIiIgaqK2X6RsZmYbrtu4iKD09CYyNZeY+4zLiGBuDY2wMjrEx+vpSC/o7vqYuHsfYGBxjY3CMjbGQ11RWsJsoFDKXewhz4hgbg2NsDI6RZtMO255jbAyOsTE4xuXDgE1ERERE1EAM2EREREREDcSATURERETUQG09yZFosZ7Z7eD+ZwRjaUFPUuHkzQqbN6zMfrBW5t8PE7lxdMWE9wO1POPhB2E+/hvAKgDhCJzDXwf3mONrnl/t2A7zsUegJicgnV1wjjgasu9+s16mu+9+UJn0rH8zH/4YJtNTCCVTi7682a6jUWOm5uD91HxtH7AXEpAWG6pWQihrhUAz3+3Y6O3+zG4HP3jYRc4GXBeYygt+8LDg7ceg7e7P2bTCfT2bZ3Y7+PGjLkIGkIwqTOUEP35UcB5W1v2wmrTDm/dcAXm2042HH4T5yC8BpQDDAGxL/wwAZ795xnWpHdsRuv8eiGFCojEgnUbo/ntg47TidplxmVYBxrO/g0SjkFRX1b+Zj+AYVMwbw713QmIJKKtQ836a674sOz0cgcplIJFYzdu5kHEv9rE038toh8fvYtTzeKTFUyLSumsyzeHhpydx80M2QgYQNgHLAWwXOO9Io+yN+e6nLDzwLFCwAVMBpgmkYrP/TS3BMDDX3/f1pTA0NFX1MpYzoAdvQzxqIJt357UNaglu50gIOPFA4E2HhOccQz3b8RdPjNd9/srrCW7rVEzwzB49RhFAAIQM/Z4mArgC9HcCl7+5+rhrqXVfL4XZHk/Nuq8b6ev32ZjKCSIhhVDIgG27KNiCVEzh0je2Xg1gOe/rei1mmT7Z/kJZuHD32gRj1876K6+GCZgmJJEEQiHAtqFcB/bJi3/zns+2ny0klYVZ/8kvAufo4+Aec3z1011XB1/vvAD07fO5LhAKo/uqj2BoaKrs+pHP67+xLf1dKSAShUQiUPm83m7+ZfrXV7whCrK2X/87k4ayLCAahXR2QRJJGDu216x4B+875PP6sl0HynUhUIBjl26jYQCGAenoLAZud69NMJ/5LcQwq96Xasd2hO65U1+/65a2jRkCxNWPg3AU6OmBff475n1fDz/yRDEIzvZYmu2+DobJeh6Pasd2hO69U28v/z6PRmGfesaM8zfq8dhM1cYYuvVmIJ0GwoH3OcsCksl530+NsFJfU1vv3WseHn5B4DhAOg84LmAaQCwE3P+MYPMGfZ67n7Jw79OAAmAowHL1V9gEIiGFSAiALWV/M5v7nxGEDP23AOb992WBJ4JlqdYFb4NSSt+WOm5DZZDbr0+wfUhhLC1wBZjM6m1sKMCygXufBgCrGLKDf5+1gIgJJCKl7VjICb73KxfxsDsjKC5ku1du693jghesmeezvceDUgBcYM8E8IXbrXlV1ZerOjzX42mh9/VSGksL4pHy34VN/XtaWuqVl2EGK1tjYzB37YQkkpB4YvbKqx8KHRtwbKhQCBIOA+EwxALMxx6BXSNQNDp8VK3Q/eyngOMArlMaq+k9V5UCXBfm47+Be8zxemfBD52A3vMGdOAKsu3S30PpoFnl+tXUZPnfiQD5HFQ+Vwy2cJzSaZXnBYBCHiqT1j9bBaipKSjIjIo3oF+EVToN85WXIYkOSCIBNT0187L9y/d3IACo8VFI71ognYb5m4cgsTgQjenzVtyX5oP3A/lcKaT728exi9tL2bbe5jXM9kmB+dgjOhj7QbDKY2muamy9l1HcGcpm9Y6QqXcUIQJkczB/eX/Nx2+lysdzcEelURXj+bYolY1vckKPIygU0refGqatA/bwlCCtX8+goEN2ugA4E6UXkQee9cJ1xXTO6TzQGdf/ns+b+WLDwGIDeiOMpQVKAUNTAsd1YBpAR3T221AZ5IanBC8OA6mYIBkFdo/rajBU6bXWdnTI/s2LFiIhvSMUj+iviayuQodMQTyikLMEUzl9Xb3J8qDY1+eNGcBQVoo7Ux2R2cdcua1zVum8Ct54PVbF6389Oz+t0NpQ7fGUz5Z2VKZypce5r9XCa09SeRXs0u8sR/+elpb52yfhBsKIsnTVUxXyuiJdJZyYjz5SPbhlMkCyQ/97ljfvZnxcXRmqVCEPFPLeiWYpzPoVSkC/aHkBWVeUAbh2fVcoAkCAULjq9c+q8s2pBjU9Xb5j4Id7vyJevA+ktEMAQGWmgcx0fbcD0NtEKT12V6CyGb3tHEdXpGOJ4n2pJsZKL/i1iJTGWmGuVpt6guBcAXquy6i5M+RvV6UAJVDjY8U/n62XvbICrjJpmLtfgcQSQDKwo5Iu6J0+79OI+exUzrbd6gnZ0tk1s4Jt2/r31DBND9jT09O46KKL8M///M/YtGlT2WlPP/00rr76akxPT+Poo4/G3//93yMUqn9IjveaVnx9hA51WatUgcxbOoz5/GAVfD+Yz5v5YsNAK1TromFgcFJXmk1Dv56OZ3RrRC0zwqqtA2/WAjpiCuLFVdfVnxA7bmk7xyPA0JT+XTQMKKUQMvWnD9N5P8zq+8Y04VVaS0HxJ4+PYzqnK82GUdqZGsvo9o5a1ebyHYnyQD0bA6UxzLbzU091uNntQJWPp2xBMJ3X769+G8xYGlBK0OE9ZlstvJ68WeHHjwpgC0xTULAFtqt/T0tLTU+Vtz04jn4TdwJ7oJVh2a7ysRCAsmfcLG/e9VQY5307KkNVNnCUuGAYdN1SoASAsPdkClaU50G8irianIBAQaWnFnQ5ldTIUHn1vDKw1giwi79ilLaPV6FW6SlId2/pPP64Zus2DVSwyyqv1T5JsG2Yj/wSk88+rUNqPg8lbingh6OQnp7SEOcI0HOFyZo7QxVtOsV/7tiO0F23A7YFEYGamEBocAD2m8/RFfNf3g9kc94brFn8dEDls5BkUl9IPgeVywAixfHNZ6dyxicsFZ/A+OOstRPgHHG03qmwUNY24+y1CaFbb16xvedLrakB+/HHH8fVV1+NF198serpV155JT796U/jiCOOwEc/+lHcdNNNuPjii+u+fMvRIdt1Aq1x3ml+BRLQYSwUeP76zxsRKfbyBt/MZwtFwTDg9wJnC4ChpGbQq2yNcFwgFagqLiTwzNauMVeQK95+lG+z2V4fK4Oc473mOt5ra7GNscplDUyWzjea1ttKKR0CxdH3g+29j3dE9fmyBf3phAjQHwXG0/qyncB7DKDbfSay1ScoBnckDAXU+zanAjtks+38zLWztNB2oPmE8sodvsmc3q4KeufJcQAXegcqGWvN8Lp5g4nzAK/VBuiKzf4cateJxe1AQmGogT06NJlm6YltBrb1PCpdamhA/yMShX3iKdXP04SPq2eEqmB/c9U/0KeLUgh/6+sLDsXK29mQcARqfHTu6i6gr2uuqVDLNlXKG3vF9lCTE3o7AfWNzdsGxsMPwnz4wZmn+58kBMK6isWArNdGAxQ/eVB2Gs4hhxb/dK4A7RxxtK4U+59gAGWPxxmPPzOkW1wqK+9mSN/m6WndX14cgG7ZMf/757C3XqYr3ap0m4vfA9tQ+Tt8oVDx04LZ2lZmhF2rAECVHjuztCgVJ7QGArzsux9snFZ++Xttgvm/jxf76VU2g9A9d8I+bWbvOdWnqetg33TTTfj4xz+O/v7+Gae98soryOVyOOKIIwAAb3vb23DHHXfM6/L94OQHLxF9g8LFKqhCPOx90ued7r/U9SR0ME7FVNmELz8UTeUkEIpcPLNbPzk2bzBx3pEGUjGFbEGHGIEONdXOX3l50ZCu2k5lBSILCzyVlzk8Jbj3aWBkuvqYKxVsoDvuVa9Ff++O69/X0pNUZW0UprdD4386kAwETcspBW3vaV8kou8r2zuPaej7IRLSlxH3erKn89CfuHr3ZWXrY5ArKPbi3/lk6YyVOxJBs70lBK8rnS99IvL1++yybVq5Tfzb7u8s3f+MwPV2AAYm9XfX1b+vZa7HX6WTNyvYLlCwpbijApQKG6ZZKkKl8zMf743yzG4HX7/Prrqd6rF5g4lL3xjCtZd049I3hmaE6/lsk9XMvx8+8q3xBf29ymdLe7GuVzV0BRKJekHC0pWuI46e3wUX8lCDe6qeJJ1dVSuyi/m42jniaCjX0RO3alQiq1H5HDC9iMlW/ouHgvdi58wd1usIqMVJjkvB36myrPIgGeTYOpTWuSMifuvMbx6ufoYqVXB3zx7d3x0ckzd51ti1szSUyvu64jGqBveUh2ug7PE44/EXjVa/DabXwhTcJsHKdvBxU/k486s2/hi9N0BJJEvnqdK2gnS6rG1K7diuz+tPIi1VC71JpbrSEqzKKz/AGybMxx4p3Z5994N9/jtgXXIp7PPfAeP5Z/X2DrYc5XO6z54WpKkV7GuuuabmaYODg+jr6yv+3NfXh4GBgXldfmdCYSgwd0SgHxddCb0aAQCs7RIMTehya94SRMMKh+1rYjwDDE+6CEcMdHXF0NenE+I3t00iGgaiYf0ECYeBvCX45XYDJx2W8sYKnHSYvs4v3joJKLfG+YFfbjfKLq87DBiGi5wF5B2FtZ0GzjgihkP3rSiFzqJyjPm0A6WAnA10d5hVx/zkjgLufCyH4UkXOVshFgY29JT2r/KWoCtp1Jwpe+7rC/j2/Rk4osNwIiKYzOreY9PULR/AzN5mQH9CUIuhgM//cS+e3FF++bajW1C6k4H+H++CI6GZOwP+dQ5NlWb72jKO3g7dh2w7up2k2lhMo7SzYTv6ukxTYSorxR7mVMJAxgJu+bWLroRCtiCIRwzkbAemqWCKwBEFUQrnvj6Bvr4IhqfHkM7p11b/8qdzeqy1tvM3t00CEEzmdFgOeRNBg/dlUF8fMJZP42ePF5C3SvdBsC3KLz5ee0k3AP1Y+OY2/VhYyOOv0pM7Crj9iQxChkIqoZCxgNufALq6ogu63MptU89zcqktdJWOZgreD8nqGWFORiwG1dMNd3pah6dQCEgmEerpgTs+DqO7G9ETTkD4wAOLfzMRDuvgUI1fVbQshH79K6hnn55xGdYpJyP7X/8F5dr6/JYFUYL4KScjXGU717Xt+w6D1RVHfts2uOPjQHc3ZHxcnxYMtIahnyDB8Qf7m/3bUOv2VfLaa8xctu6WtPou1oATrPA2QM1dDRHdkhKN1m6VEYEaG6n/umwLfX0pTDjBSaEovy+qtLkEx2iuW+f9iUDSU+jxHwcV97XR3Q3zVa9C6KlH4T5wL2RyUl9fRetT6MlH0XX2m2c8/txCvmKCqzf5EwIzbJZ/EipSNsa+vhQm16yBDA+Xbqdf2evsQqi7E+74OCQa0XMbshm9Q2fqSrOxphfdfSlM//RRSDgEFfFeP8MmpFBA6KlH0XH0YZhIJoCJKp/wJBN6DOkpqFhMh2sA4ZABMSPl263CxOR4+e32bruaHF+S17tWfE1drGWb5FhtdUA110dplZfhSnAuBwC/mimwvSRVsAXRcGmCm20LHn/RRioGRE1gdNLFt+6bLlb1BsdsxCOAbZfGYkAwOOZWXUZmcMyGUsDodGklk44oMDjmFk+vvLx4WPdSfPBMf/PnMTSUn3HZtVRepu2U+s/92x0cc+V6z4CuGk9m9UuFoXRoPeO1qLlUzvoEcM5hKH5M35tUOOpVgu1DwFha7zAkIjrAOm6pCDbXm0zeBj7076PoSSps7CotoQfoan/YEACqLLjPVvBx3NJt6IrpgLy2o7Ttp7KCggPEw3rVE0dKxSZIqeI+lXF1f3lU70Q4jqBgCdI5IFtw0J8C8gWviOC6SOeN4ioi6xP6/izY3uPTAWyU3iwKttTczq8M28gUAm0tDjCREVi2XXPJx7ufcIuPAX+bW3bp0xURwH/tCi53WO3xvxA/eciGEoGplG6TVIAjgp88lMb6RP2Pa6D6ck3zfU42W6suKRW8H1Swz2keHGXCNcNAl9fjKgI1PQXLcqAcF47lIDeRhQRuv3HE0TAf/iVmPNuDy9sBgOvCDUXgjE/C/tGPytZfdg84aMZSgPnu9XqPOWBey6JNZGF645bOLki8A8aeV8pDqt9bGFQZ9OYTar3t7lp2lcmHNfjBb5Y+ast2ofx+XiDQkydl/dF1DxOzvDYrBVnTp/tyc6O1L2Q+bTSOg6GhKRSbOOqo2iulSjlBBJZfHRkdgXJsjP/93wNKwT1gM5zTzwbOfpv+u7Jl+SJQgcdfWdtGPq8fS93roU54Y2kVERFIRycQ020jamSo2Btu2W5ZoC7bjqapl2c85sTypQsNQ7eknHQaCrWWibRtyNQUrM2HIDs0hfDIqK5cBytCyoQaGdXb0RVIPAGVyxYfAxKLQ7n6vSWUTBXbZsIhQ287ywKStZ8/Yf/xFLxvvH83+/WuVV9Tg9pqmb5169Zh2N/LAzA0NFS1lWQ2E9nqvx/PALGw7q+ezOrAVlw+bo5l+uY7ibGyz7dywmAzVkiovEy/1zbYJpnO6/D6hdstTOd1+PYrtf7rouN6LTZqZjWjVs9rrZVOPvNjqzwYzuP2+G0uLwzp8SkAUEDGAkJZQXeH6LDo3cbKnapaqvXLmyZw4dE6TH79Phsj06W+eNMAklFgTYdef/kLt1tlPdbT+UBBwp8ECUEyqnDVO7pnvEBIxcRKCfw+KLitM17fuZ+PvAnsNT8FuOMJF9lC6XXagNfbDq9lygAiYeDMw/QFVlt1ZLblEevR7Im7XGWkPtXuh3mrDHmZjA4KFR9VO4MHlwVi98DNpfWYa1Uli2tKu7q3tlDQk+XSaZjP/LYh62QXr6pyZZLxMRiZNCSeBBIJqNERPXknuIpILfMJ2MVWhzom/s2H6DYdZdvF24DJCb06SmUoWojgyireaioqk178uCtIqnPmkoW+ygp9teUKx0ahHLvsd3ppQuiQjSqTFoM7ev6bpEhpQiu8dgnvsVdcI9rnf8KhlA7bswh/6+t6B/G1h8+6dryxayckntSr9PiTNw0D5uO/gfm7p0prcAdbSAJtU37fuXQEQp9lFSdRBicxihkpts3Ys7R2SXcP1Oho+Y6hoGxCKc1PU3uwZ7Nx40ZEo1H8+te/BgD86Ec/whve8IZ5XUatoOVKqb/aFS/0GeVtUdOBwlowDFT2tM7VIz3XhMH5Xl49Ki8zFtLXGw/rTwamc7pyGzF1ePV7ootjC1zW3mtNrOtUMA3ge79y8YXbLVx/p4UfPDy/nlfbLZ/XERSa41GmlCout+gEiwzQ92M6r5fuS0Z17/y6WVY7Carsl6/sPz55s4JhAF1xfZldcf048e+byh5rxwUg5e0XswVJZZT3oPv/DhYXK/uL/ceQ7c1dcXUBH2aNh8vItBesvexiGt6/oVtb9u5VuOCY0m0eS+udDZ+/PGLBrj6HoB5z9aIvVjOeQytRtfthvlQhDzUyDDU0oL8y03rpuXC4NBnLcWD+5qGy0G0M7IZ9+tmw/uwKuAcepC+sMiBFdN+KyqRLEwNq9IcuVlnI8pYZhFLFZQelo6NU9Z2rEtsxj8pVsQXCqG+CI1BXQFb5HNDdA+fo44CeHqh8Dmq2cVeuxjXX6lwV41RTUw1tR/GDoHvQIbWvf7YdHaW821xjub/nnimddXJCT4YcHy0PxP529iZkOYe/ruplVfZ0S3BcNe9P/Vjynw/mM7+Fc8TRxf7mGUfHnJworbwA6OUQ8wUdkqMxPdk4kwaK656X95XP1Xcu++4H++TTgGQSksvpA8jMsQPrHHcyEPcme/qPrXhM/54WZMkr2Jdddhkuv/xyHHroofjiF7+Iq6++Gul0GgcffDC2bt3asOu58hy993r19y0YgeeE/5FOrWX6gisa1LNigT9hcLqAsvWZ/TaH+V5ePSovc21K4Zj9S6uI5G3d2pCKB5sGSkvoVb6UV65BXbmkXj1rdZsKsFRpBYug2d46ihOsK+aNKADwWhyuvaS72Ori3+Z6zVZ1n+u+qayAG0q3lKQCE85nC5IhAygEwq8fmIM7HJUV5XBIYHn9JILSwZPWpmZ5o/ZCuE9Bv1f5z4GgympwteUR57sue7VPChoZgJvxHFqJKpc7XBDXnXlQkHwOKMRKATnvLVVTa93hTBoSjUIVKqrZ/uX6S/8Fg0uDD3IxY2WIyuUGozFdqMtM6yphraoqUP1AMTOuUJU+MgK8qmfgyT/bMnp1tHdYl1xa/Ld/zvC/XF9a6QVAcVIlUL7TYNTxPAm2TwRXkGkQ9w9eq4eyayck2aF3eByn7hAvkSjsSy5F+P/7fzXOUHqczVjBpXi/e9t5joOyVK6wAYE+0I7rlB5HABAKwVy7FvbwcOkj5Borgsy8PRFdLfarf/7jw68EJpP6Zd2yACMH6eyCHaiCV1sFxK6okvtV+Z462y9k3/1gn3rGij5E/FJbkoB9zz33FP99ww03FP990EEH4fvf/37Dry/4shAJ6X7UYOvVXMv0zRbKKvmBpS8QgPzDPC/k8uo122VWtjaYRvm61D4/6FWGLNebuDGVA2L+e+gcH/mv61IYnhLk7NKyiLaj/64vpY+OWO0TB6PiNbzsx4rgGLzNf/cDq2rbxFzV8krzCeC9SV1NN1Ttx05Qf6ea2YIS1i0ovsqP9VMxvW41oKvqc13H2pRuUar4VA+12sUqw3Dl8ojA/Ns7liIAN+M5tNJULne4II5dHq68N36VSeuVRIDS5Meg4AoIkxOQVBfEv4x8Tq+wYJf3JUs8Ufr7Bh/kYsbSbX4LRLCPzjQh6/eCff47agc3YO6gGQoV956lW3+cLr1rgPGxUpCsJRyG9K/X226uEF9NcYk2BFpwRFfd/VUtKl9kq13Mmj69ssaknmRdvOzF8CaQSiQKY9dOuPAeG/FEcfWM4jKOs1bxVXH5w5o97cH7yK+iqcD5lQHp7YV9YX1FvGDLSPhbX9c7a/51+NvJdnSPuLd0U60VQapfAUrvbVV6ngHoNqB8rmznqtYYG6UZl7matfWRHGu9bEQDhbsTD9RHEyyule/93l+mb7FhoNnVu4WorFJ2xfX60wpekcV7zUlGUXUNaj+QB6vKc33k72+HrjDK1gdPRkvL8EVD+qA0gK6aj6VLYdW/Tv+9YK6geMpBwN2/rf77RqoMdvNZj7m4TeKo+diovK9iYYVUTH8KUc/j84xDjeIEVsdrJ02E9e9r3Z5gGI6E/Fai0pgW0t7BANwa/PthwTPyg5VMQLc6iFsKx7YNGAYkXLFMSZX+0GK4jcYgtgNlW1D5nO71zGS8j9OkeJCL2fpD56vyQBoSieqdhHC0+nWGwjUOmFOlMjPjyhz9xAt8nF68/mRKh62xkfJt6x0wBT09sM9/BwAg/M9fql7RrdE6UbVnFgqyZg3sC7eWJtLNuE+V3ikIVpD9iR/xmJ697bqlFoZ6JzMG+7jNkN7J8C675kFf/BVaKrevP8bgJFkA7gGbiz3XQe4Bm0t/WihAUim9Qoff35xM6E9UFmDGmCNRfeh5y9LtF+EwJBwufsIDYM4dRmUVIMmUPtiMXxWvMtmWR1Zsb20dsP2j+gUfllET2Ku79GLypkPCACw88Kxu24iEgDce6P9+8Vrx4+vK0O9P3kvFgLyFsgPTTOQwI2R1RPVETT3Bfu5KLVB9O5x9eGk7+L3GhUC7RSJSCuBrO/SkVX8VkrmCYrX79cQG3q+z3c56g2Q9j41qO2iGAVx4bH2remzeYOLtx8zv8Re8DZX3SyvsINIy8lfV8AOZoQAxdUjM64+qnQM3w3zmtxDLKjsKnB3oD51xlLiQCfvU08sOJz3bx9uLNeMj9O4eOAcfWjbxLHidzpHVV0KRhD4imMTipYOD+Kt5iOh2B6VmfJxe7fpVJqMroVW2GQA4Rx1b9SAszlHHVr2NznEnlw7JXSXk+y0QxaMm+gG4oiVFUp3F+9Y/+EpxFQ7H0UejFEBSKcAwodLeIdsdW6+77LqlGf5K97ZJR0dpoJUHfanc8QlM3CxOPg2ukhKYaOdPZDSee6b4OC2uIuLfHn8CYPBok4EJgPNV9fFsmrDfeDrWHn0Yhh95wju9+vOhmhlj9Kvi/vOvCTudtPSUVFsvr03c9stx/PQxt3iwlEREPz6bcSCNhVjOpWfqrbT29aXKlm7zQ1amUB7IG7HTMNeYap3eLkv4LHSMS3WUwlpjbKWjJK70+3qpLLSCPfkv/wJ3YLD84/VobMbR3GY9ylwdpy9UM7d92SG8zRBgKEiioxSagsFylj7eWmOsZ5uUjWGOXuF6L7Pa5SurAJnl8ssuNxwBlFcZnuu+DkegchlIpGJHIjDBrnLM7l6bSjs+4YhuJ3L0kpOiFBCNwj61/qMJli/TV30M81VrO/v39Xzvh6pjLOTKlq5sh+dMo7TyGP1aw5o1HfNeSrqtA/bIyDSefsVumXBQqZUfND5/jK0Usiq103ZsZRxjY7TLGBdi9Imnof7nHn24ZwDS1QPn+JNbZqLTUm77he4ktMvjo1ljXOzOlf/3ZnoKTjK1oKDZrB28SovZju0wxqWy1GMs5mQRKH89XXGhvO8i4k1ccyGuC7iCnp4EzN7eWS51prZuEQHY+9ko3I5EJBv3hlPnRLCVjhO+Fmax222+q180YwxLoR3G2E5KoRleaC4PziLujNAMBA5WOFeteQG16LYP2ERERES08lStNrsuFATiepNgXVcHaG+N89KKaTL7OsFNxoBNREREREsmWHEWx4HhOl61WUoVZtepUm1e3tA8HwzYRERERNRQylsbWHmtGbrq7LVq+OFZBK5RgDuVWfza6y2GAZuIiIiI5k15ayWrYAXa0Ue9FMfRAbryKHfVrLBwDTBgExEREVEVZT3QruP1QnuVaNuBuPqARXWF6FWGAZuIiIhoFZvRziEuxG/lmE8lmooYsImIiIhWuGKInlGJthmim4ABm4iIiGgF8cO0FAow7QLEtiG2zXaOJcSATURERNSGyiYZut4BVmwH4nhh2rDgTKeXe5irEgM2ERERUYsrtXi4UH5vtG1DHBeuCKvSLYYBm4iIiKjFKADK1ZVoOA7EsrwWj/Y52MpqxoBNREREtIx0mPbaPFyvMm07rEy3MQZsIiIioiVQ6pn2KtP+Kh4M0ysOAzYRERFRE5Qq0/rohuyZXj0YsImIiIgWqWydaccBLIuV6VWMAZuIiIhonkQERqA6zUmIFMSATURERDSL0qHE/VYPC+6YBXcqzer0SmJZUOlpID0N5X0hkwa2nDPvi2LAJiIiIgooBWq71O7hOOXVaYkwXLcb24Ka9gL09FQpRPuBOp9v2FUxYBMREdGqVjNQuwzQbcVxgEy6FJ6np4Dpaaj0FNT0NFQ+V9fFiGEAiSQk2QFJdixoKAzYREREtKqUt3zYDNTtQgTI53WAnp4CvO/Fn7MZqDo+VRDDBJKlAC0dqcDPKSAe9x4kC8eATURERCtecck82wasAgN1q3LdUhV6arI8TE9NQdnWnBchSukKdIcOzNLRAXjfGxWg58KATURERCuOgkD5R0YsWBDbhisuV/hoBY6j+56nJlF4OQ9zYLgYppGe1vfbHCQShXSkdPW5owPS0akDdEcKSCQBw1iCG1IbAzYRERG1PaWgg5nX9iGFgq5QcyLi8nBdr+o8Wf7lh2jvfikAMKv8eakKrUO0pFKQjk4vTKeASHRJb858MWATERFRW6rW9iFs+1g6IrqdY2oSanKiFKInJ8pCdM0/NwwYnZ1wEh1eiO7UXx0pINkBmNWid3tgwCYiIqK2UKpS2/qw4wWLR0pcCoU81KQfoiegJieBKS9QO86sfyqGAXTo6rOkOiGdgRCdSKJ3bQqjo+kluiFLhwGbiIiIWlKt5fNYpW4C19VV58mJ8q+pCajc7MvbiVJAsqNUgU51QlJdkM7OluiHXg4M2ERERNQyFAApFGAU8lztoxls2wvP47qVoximJ3W7zSwkGoN0dnmV6C6vGt0FdKTaup2jGRiwiYiIaFkpQFepbQuSL8A14nAzmeUeVnsr5OHsnoTx8h4dpifGS73Rs/yZGAYk1Ql0dhWr0DpMd7X8xMJWwoBNRERES04p6P5d24Lk83BdLqG3ILlcKUAHv3JZZFE76Ek4AunqgnR2FwO0dHXryYWrsKWj0RiwiYiIaEkUQ7XjQPI5uI7LCYr1yuehJsZ0eB73vk+Oz90fHU9AurqLAdoP1IjFmn6wldWMAZuIiIiapriUnmMDhQJc22Gono1leVXosVKQnhiDymZn/TNJdgSCdA+kqxvd+27AWNpeooFTEAM2ERERNYxeSk9XqWE7EIsHfKnKdfXkwnE/SHvf09Oz/lkxSAe/OruBcHjGeVU0CjBgLwsGbCIiIlqwskBt2RDbgrgCYaDWRIBsFmp81AvT3vfJiVkPCS7xuFeJ7oF0dxer0tWCNLUeBmwiIiKalxlHULQdBmoAcGzd0jHmB2kvTOfzNf9EwmFItx+kS4Ea0djSjZsajgGbiIiI5qQgULalq9QWj6DoptNQu3bqEO0H6smJmocHF6V0f3R3rw7S3hcSSU42XIEYsImIiKiq4qofVkGvTz1LS8OK5brA1CSMsVGosRGoMV2VzuSyqNWsIbF4KUT39OpQ3dnFg7GsIgzYREREBCBwaHLbBhyvUr2altKzbd3S4QfpMa/Nw6l+hMNiVdoP0d53xONLPHBqNQzYREREq5RS0KtZFCcpWqvn0ORWQQfo0RH9NTYye4tHOFwWpFP7bsSEigImoxTNxEcFERHRKqEUII4Dw7YA14FYFrAaAnUhXwrSoyMwxkagpiZrnl3iCR2me9bA7dXfkewo65U2e5PAaHopRk9tiAGbiIhohdKrfdh6tQ/bhtgOXFWAO72Cg2E+r6vRI8MwRkegRodnXVtaOlJwe9dAeryv3l4gxhYPWhwGbCIiohWi2EPteIG64E1MXKkF6kKhGKbVqBeop6eqnlUAoLOrFKZ710J6eoFIZEmHTKtDUwP2bbfdhq9+9auwLAt//Md/jHe9611lpz/11FP4u7/7O1iWhQ0bNuALX/gCOjs7mzkkIiKiFUMBUOLolS4cV6/2sVJbPmy7PEyPDNds8yhNPlwDWbMW0rtGTz7kQVpoiTQtYA8MDOC6667DLbfcgkgkgosuugjHHnssDjjggOJ5rrnmGlx++eU45ZRTcO211+Ib3/gGrrjiimYNiYiIqG2VjpjoAt5BXvwDvKy0g7yI6+oJiCNDOlCPDOtDiVe5naXKtBek13iV6RDDNC2fpgXsbdu24bjjjkN3dzcA4IwzzsAdd9yBD3zgA8XzuK6LdFr3gWWzWXR1dTVrOERERG2j2DvtuvprBYdpiADpaRjDXpgeHUJ6bBRh265+9mQH3DVrvcq0DtUIs82DWouSJj1T/+Vf/gWZTKZYkb755pvxxBNP4FOf+lTxPI899hje8573IJlMIh6P46abbkJPT08zhkNERNSyxA/RrquXyrOsFbv2tORycAYG4A4MwPG+kMtVPa9KJGD098Nct05/7++H4hrTtAzMNWvmdf6mVbCr5XYVWN4ml8vhYx/7GL75zW/isMMOw7/927/hwx/+ML72ta/VfR0jI9Mt3WfW15fC0FD1yRatgmNsDI6xMTjGxujrSy3o7/iaunizjbH4FugKlLhQjgOxbYhjAa5XmV6Czd/bm8ToUi0v57r6wC0jQ1DDg7pvenKi6lklFNIV6TVrkXzV3piMpoB4ovww4lkXyLbGCihLuh0XiGNsjN7e5Lz/pmkBe926dXjkkUeKPw8ODqK/v7/48+9//3tEo1EcdthhAIALL7wQ119/fbOGQ0RE1FRK6eKSId6RD10HcP3vrg7Qrj7NXaIwveSyGajhIRjDg17/9Ihe0aSCKAXp6oGsXQtZ06e/OrsAwwAAhLjGNLW5pgXsE044AV/5ylcwOjqKeDyOu+66q6w9ZN9998WePXvwwgsvYP/998fdd9+NQw89tFnDISIiWrTiMniunmio4LV3uAI4NlzDgkyll6wavaxcV6/qMexVp4eHaq43LfEEZG0f3DV9kLV9um+akxBpBWtqBfuKK67A1q1bYVkWLrjgAhx22GG47LLLcPnll+PQQw/FZz/7WfzVX/0VRARr1qzBZz7zmWYNh4iIqC6lVg5Xr1rhfRfHgdgW4Lq1K9AikBZus1mUXM4L0oNQw4N6QqLjzDibGKZezWNtH9y1/ZC1fUBi/h+xE7Wzpq6DvWXLFmzZsqXsdzfccEPx36eccgpOOeWUZg6BiIioTDFAi+6FRlmIdgHH1t8hcAUrdrLhrESgJsZ1kB7yQnWtNacTyWKQlrX9eok801ziARO1Fh7JkYiI2l4pNAMQ1wvOgBKv91lErx8tbqmlww/QWAXtHHOxbV2RHh6AMaRDtbIKM84mhqEP3tLX74XqfiCRWIYBE7U2BmwiIloWwcUh6iGuC8N1dFgWt3wCYSA0A1i9led65XJQQwMwhgZ0mB4drn4Ql2isFKb7+nXvtMnoQDQXPkuIiAiAd6TAYOj1qrvFNgpR+ggofsVXdKBVeqqf/s/7HST4VXaBgcufPQAXl3v1LsM1LLhTabDiPE8iwPSUF6YHkB4ZQmR8vPpZO7vg9q0rhmqkOue/J0REDNhERORJTwGWN2lNiv9D+Zw9Kfu2pPzATrNzXX1Y8cGBUpU6my2e7G9BMQy9PF5fvw7Va/uAaGx5xky0wjBgExERAEBs15vcR23FcaBGR6CG9sAYHKjdPx2OQPr6EdtnE9IdvZA1bPcgahY+s4iIiNqJbeuDuAx6gXp4sPpyefEE3P51kD7vq7sHUAqR3iSmeRAXoqZiwCYiImpltqWXyhvco9s+Rob0gW4qSKqzGKjd/vVAsoP900TLhAGbiIiolVh+oN4NNbhHL59Xpffc7eqG9K+H9K+H278OiHO5PKJWwYBNRES0nCxLT0Yc3AM1sKfqknmiFKS7tximpX8dJyS2AqWqf0iw4LnAUvWf1H4YsImIiJaSbetAPbAHamB37UDdswaybj3c/vWQvn4gEl2mAa8yXmLW3xRgGlDKAEwDUAaUoSBQMFIdMBwTgvKErcr+XZm0/Z9rtO6o4Hm8/1VZ5lKV/UqqZ3EBVDwOI+GtEV9cbtM7/2wr8vjLa/pXNFurkdS4jZXDD57WDjsPqvi/BWHAJiIiaibHhhoegjGwW1eoq/RQzwzU64BIZJkGvAJ5YakYmg0FZejA7AdoUdA/K73eu8AADB2w3CqBUEUicFV+xu+rZsdlaoU3Egm46fIJsE1tyw+GbNG7Hqq4bj4C6+hLcefASCZhFrxfo/T74nmKy3OW1sWXWjsHSkH5N1Ap7/70/g3vtMrbH9iR0JeqSgcF8E4zulPz3hQM2ERERI3kurpvemC3DtVDg1BuecgRoBSo121goJ6vsiozUAzNfjAqVpwDPysFMQx9XlU9NM/QDpXWeVqapeT1dpfAjzXPGYvBCVn1X/IcOwjNOIirMs15/w0DNhER0WKIQI2PQu3ZjezoIMKvvAJl2zPO5nb3QNZtgLtOT0xky0eFYGhWfoXZ/7cJhEzArzp7IVmHOFWsNNcdrlZgcF4t2uVYUwzYRERE8+EfenzPrlKVOq9bBRyUinXS2QW3GKg3ALFVPClRqeo9zYYBpRQk8HG+KANGTwpww8U/n7Pa3Cahi1YPBmwiIqK5ZLMwBnZB7dmtg3Vm5oFaJJFEeJ+9kevpg7tuA5BILsNAl5Afmg1DV5tN0+tbDUzGM81ieBY1e09z+UWrtqlUElXDgE1ERFTJsvSREvfshhrYBWN8bMZZJBrV/dPrNsBdvxfQkUJqTQcyK+koicUQ7bVpmGbV0FxXPzPASjOtGgzYRERErquXy9u9S1eqh6us9GGGIP3r4K7fC7J+A6S7t/2PlKgA5a+a4K+sYQRCtKEghq5M1wzRDM1EMzBgExHR6jQ1CWPPLhi7X9HL51mFspNFKUjvWsiGveCu2wuytk+3QbSjQJBWpgkjFtMh2tB90OL1PzNEEzUGAzYREa0OhbyelLjbC9Xp6RlnKU5M3LCXnpjYrkvneesBq5AJhMNef7QO0kZ3Cq41NfNvGKKJGoYBm4iIVibXhRoZgrF7F9SeV/Ta1JVHTIxGvZaPvXQfdbJjmQa7OMpQUKFQqUfa0F/u3H9KRE3AgE1ERCvH9BSM3a/olT727J7Z9mEYkD6vj3rDXpCeNe3ZR60ApQyoSBgIhSGhkG5pYRWaqCUwYBMRUfuyLaiBPTpU734Fampyxlncrm5dod6wEdK/DgiFq1xQG1AKyjSgIhEgFIIYofIKNcM1UctgwCYiovYhAjUxDrV7J4xdr0ANDcxc7SMSLVao3Q0b23Y9amUYUOFw2REMxTTrXxKPiJYNAzYREbW2Qh5qz27kHhtAePuLUNlM2cmiFGRNn65Qb9gI6V2jA2kb0r3UYSASgYTCM3uoGa6J2gIDNhERtRYRqLERqF07ddvH8BCUCGwEDkOeSMLdsFGH6vUbgEh0OUe8OH7rRzQGhEJwVXvuHBBRCQM2EREtv3xeh+ldO2HseQUqlys7WQwDoY0bkV+7Xlepu7rbc3JigDIMqGgECIe9FT/a+/YQUQkDNhERLb1glXrXzupL6HWk4O61SVep161Hqr8b2XY/DLlSMMJhIBKGmGG4bb6TQETVMWATEdHSKOSh9uyCscuboJjLlp0spgnpX69D9V6bgFTnMg20CZSCEYkA0ai+neylJlrRGLCJiKg5/BU/du2EsetlqKHB2lXqvTZB+tcDoRX2tqQUjHAEiEXhGt5h1hmuiVa8FfZKRkREy8q29eHIX3lZt35kyls6xDAh/etKVerOrmUaaHMpQ+n1qiOBYE1EqwYDNhERLc70lG77eOVlqIE9UK5TdrIkkqUq9foN7Xugl3ooBSMRB8JhuIrBmmi1YsAmIqL5cV2o4UEdqHfthDExXnayKAVZ2w934ybIXpsgXT1tv+LHrJTSFetYDEZ3N1ynzSdiEtGiMWATEdHc8nkYu3dCveKt+mEVyk6WaBTuBh2o3b02tve61HVQIVMfZdHUR1n0D1uu2vQAN0TUWAzYREQ0kwgwOaF7qV95GWp45gRFt6fXC9R7Q9asbdujJ86HX6mWcJRL7BFRTQzYRESkua5eRm/nS7pKPT1VdrKYJmTdXnA3boK7cW8gkVymgS4DBW81kBgnLRLRnBiwiYgIABC+48eQ4eGy30kiAXevveFu3BuybsPKW0ZvLgowQiEgFoOEwly/mojqsspeKYmIqCbLAgC4a9bqto9Ne0O6e1f2BMVZGCETiMUhoRAEiutXE1HdGLCJiAgA4Bx5NOyetUA8sdxDWVYqZOo+61BYB2sionliwCYiIgCAu89+gO3MfcYVSoVMqGgMEo7AXe7BEFFbY8AmIqLVTSkYsSgkGoPLijURNQADNhERrVpGyAQSSa4MQkQNxYBNRESrTrAdhHMXiajRGLCJiGjVMMIhIKonMLLPmoiahQGbiIhWPK4MQkRLiQGbiIhWLGUYUPE4VwYhoiVlNPPCb7vtNpx99tk4/fTTceONN844/YUXXsAll1yC8847D+973/swMTHRzOEQEdEqoZSCEY9BdXbCZZ81ES2xpgXsgYEBXHfddfj2t7+NW2+9Fd/73vfw3HPPFU8XEfzZn/0ZLrvsMvz4xz/GH/zBH+BrX/tas4ZDRESrgQKMSASqMwU3Gueye0S0LJoWsLdt24bjjjsO3d3dSCQSOOOMM3DHHXcUT3/qqaeQSCTwhje8AQDwp3/6p3jXu97VrOEQEdEKp0ImjI4OSDIJV3HZPSJaPk0L2IODg+jr6yv+3N/fj4GBgeLPL730EtauXYsPf/jD2LJlCz7+8Y8jkVjdh+clIqL5U6YBI5kEOjrhmmEI+0GIaJk1bZKjVHmFU6r0UZ1t23jooYfwn//5nzj00EPxpS99Cddeey2uvfbauq9jzZqOhoy1mfr6Uss9hDlxjI3BMTYGx7h8ursTgNvaUwF7e5NlP6toFCqRgDKaOqVoXtrh8cExNgbH2BjtMMb5alrAXrduHR555JHiz4ODg+jv7y/+3NfXh3333ReHHnooAODcc8/F5ZdfPq/rGBmZhuu2bqmiry+FoaGp5R7GrDjGxuAYG4NjbIyFvlmNj2fg2k6DR9M4vb1JjI6mAQBGOAzE4/o9IJte5pGVtMvjg2NcPI6xMdpljPPVtF3+E044AQ8++CBGR0eRzWZx1113FfutAeDII4/E6Ogofve73wEA7rnnHhxyyCHNGg4REa0AfjuIJHl4cyJqXU2tYF9xxRXYunUrLMvCBRdcgMMOOwyXXXYZLr/8chx66KH4p3/6J1x99dXIZrNYv349Pv/5zzdrOERE1MaUUlDRKJAKc2UQImp5TT3QzJYtW7Bly5ay391www3Ffx9++OH4/ve/38whEBFRm/PbQYyODki2tT9KJiICeCRHIiJqUco0oGJxSJiHNyei9sKATURELUUZBlQsColE2Q5CRG2JAZuIiFpCsGLNYE1E7WzWgH3aaaeVrV1d6e677274gIiIaHUpBesIWnsVbiKi+swasL/85S8DAL797W8jHA7jwgsvhGmauOWWW2BZ1pIMkIiIViZlKB2sIxFWrIloRZk1YL/2ta8FADz77LO4+eabi7+/6qqrcMEFFzR3ZEREtCIpw1tyLxKFq1rnCIxERI1S1yvb5OQkRkdHiz8PDAxgenq6aYMiIqIVSCkYkShURwpuNM5wTUQrVl2THN/97ndjy5YtOOmkkyAieOCBB3DllVc2e2xERLRCqJAJFY9DQmGILPdoiIiaq66AffHFF+N1r3sdHnzwQSilcOmll+I1r3lNs8dGRERtTikFFY+VltxjuCaiVaDuz+defPFFjI+P44/+6I/w+9//vpljIiKidqcAIxKGSqXgRmI8UAwRrSp1Beyvfe1r+M53voM77rgD+Xwe//iP/4h/+qd/avbYiIioDamQCSOZhCQ74Brmcg+HiGjJ1RWwb7/9dtxwww2Ix+Po6enBTTfdhJ/85CfNHhsREbUTBRiJONDRCTcUYa81Ea1adfVgh0IhRCKR4s+dnZ0IhXgQSCIi8igFI5GAG47MfV4iohWurpS8YcMG3HfffVBKoVAo4Bvf+AY2btzY7LEREVEbUIaCSnbANVl4ISIC6gzYf/u3f4sPfehDeOaZZ3DEEUfg8MMPxz/8wz80e2xERNTijHAISCTgKvZaExH56grYiUQC3/zmN5HNZuE4Djo6Opo9LiIiamHKUFDxBCQc4cp7REQV6prk+KY3vQkf+tCH8NRTTzFcExGtZgowIhG9/B7DNRFRVXUF7LvvvhtHHnkkPve5z+HMM8/EN77xjbJDpxMR0cqnQiaMjg5IMsmWECKiWdQVsFOpFN75znfi5ptvxpe+9CXceeedOOWUU5o9NiIiagHKMGAkk3r5PZOHOicimkvdU76feuop/PCHP8R//dd/4dBDD8X111/fzHEREdFyUwpGNAJEY3BV3Qf+JSJa9eoK2Fu2bEE2m8Xb3/523HLLLVi3bl2zx0VERMvICIeBeJxHYiQiWoC6AvZHPvIRnHjiic0eCxERLTNlGFDxOCQchkAt93CIiNrSrAH7hhtuwGWXXYZ77rkH995774zTr7766qYNjIiIlpYRjcANRdgOQkS0SLMG7FQqBQDo6elZksEQEdHycaNxuC5nMBIRLdasAfuiiy4CAKxduxbnnnsu18AmIiIiIppDXZ8DPvTQQ/jDP/xDXHXVVXj00UebPSYiIiIiorZV1yTH//f//h8mJibwk5/8BNdccw3y+TwuuOACvPvd7272+IiIiIiI2krdM1m6urpw4YUX4v/+3/+LeDyOG264oZnjIiIiIiJqS3VVsJ966inccsstuOOOO3DwwQfj0ksvxWmnndbssRERERERtZ26Avb73/9+XHDBBbj55pux1157NXtMRERERERtq66AfdRRR+EDH/hAs8dCRERERNT26urBfu655yDCtVGJiIiIiOZSVwV77dq1OOecc3D44YcjmUwWf88jORIRERERlasrYB955JE48sgjmz0WIiIiIqK2V1fAbtX+a9sFTAWwe4WIiIiIWkVdAXvLli1Vf3/bbbc1dDDzNZ0VmAaQiAJqWUdCRERERKTVFbD/9m//tvhvy7Lw85//HP39/U0bVL0EQLYgsByFZAyImKxmExEREdHyqitgv/71ry/7+YQTTsBFF12EP/uzP2vKoObLdgSTGSAWVqxmExEREdGyqvtQ6UFjY2MYHBxs9FgWRURXs8fSgoID6Po2EREREdHSWlAP9q5du3DhhRc2ZUCL5brAZEYQ9arZJsvZRERERLSE6grYH/vYx/Dyyy9j3333xUMPPYTnn3++ZdpDaslbgoINxCMK8QjbRoiIiIhoadTVInL77bfjySefRG9vL7773e8iFovhYx/7WLPHtmgiQCYvmMgAlgsopmwiIiIiarK6AvZTTz2FT3ziE/j5z3+Ot771rfjsZz+LV155pdljaxg9CVIwnWNnNhERERE1V10BW0RgGAYeeOABHHfccQCAbDbb1IE1mj8JcjwNToIkIiIioqapK2Dvs88+uOyyy7Bz5068/vWvxwc/+EEcdNBBzR5bUziurmZP5RRcZmwiIiIiarC6Jjl+9rOfxc9+9jMcddRRCIfDOProo/GWt7ylyUNrrrwlsGwgHlWIh5d7NERERES0UtRVwU4kEjj//POxadMmAMA73/lOxOPxOf/utttuw9lnn43TTz8dN954Y83z3XfffTjttNPqHHLjuAKkc4JxToIkIiIiogapq4K9EAMDA7juuutwyy23IBKJ4KKLLsKxxx6LAw44oOx8w8PD+NznPtesYdTFPxKkv3b2go6+Q0RERESEJmbJbdu24bjjjkN3dzcSiQTOOOMM3HHHHTPOd/XVV+MDH/hAs4ZRNxEgVxCMpwU5i1MgiYiIiGhhmlbBHhwcRF9fX/Hn/v5+PPHEE2Xn+Y//+A8cfPDBOPzwwxd0HV3dCTjuooZZkzKAjqhCPLq4fZC+vlSDRtQ8HGNjcIyNwTEunzVrOpZ7CHNqh23PMTYGx9gYHOPyaFrAFplZA1aBJuff//73uOuuu/Dv//7v2LNnz4KuY2I8A8tuXq15BEA0svC2kb6+FIaGpho9rIbiGBuDY2wMjrExFvpmNTIyDbeFl1dql23PMS4ex9gYHGNjLOQ1tWktIuvWrcPw8HDx58HBQfT39xd/vuOOOzA0NIS3v/3t+JM/+RMMDg7i4osvbtZwFkRQahspOJwESURERERza1rAPuGEE/Dggw9idHQU2WwWd911F97whjcUT7/88stx55134tZbb8XXvvY19Pf349vf/nazhrMorgtv7Wxw7WwiIiIimlVTK9hXXHEFtm7dire85S0499xzcdhhh+Gyyy7Dk08+2ayrbapcQTCeEeRsToIkIiIiouqa1oMNAFu2bMGWLVvKfnfDDTfMON+mTZtwzz33NHMoDeO6wHRWkA8pJGNAiG0jRERERBTAJZ8XyLIFE2lBusBqNhERERGVMGAvggiQzQsmMoDlAIzaRERERMSA3QC2I5jICCZyCjYPuU5ERES0qjW1B3u1sSzBhA1EQnrtbCIiIiJafRiwG0wEyFuCgg1Epx04AoQM/XsiIiIiWvnYItIkIkDeBsbTXD+biIiIaDVhBbvJRPT62QULiEUU4hGALdpEREREKxcr2EvEFSDjrThS4IojRERERCsWA/YSsx3BpL/iCDM2ERER0YrDgL1MLKt0oBp3uQdDRERERA3DHuxl5B+oJm8B8YhCLMz+bCIiIqJ2xwp2C3BdIJ0TjKUFWYvd2URERETtjAG7hfhBmxMhiYiIiNpXWwfsZwcEOWvlhdCyiZA89DoRERFRW2nrHuz//p2L0WkXe/cqvLpff3XGV04aDR56PRYBIiaPCElERETU6to6YJveIchfGhG8NCK492mgvxN4db+BA/oV1qYA1ebl3+Ch10MhhQSDNhEREVFLa+uA/X9OMPDcHuC5AcELQ4KcBQxOAoOTLh58DuiMo1jZ3tijYBrtG7ZFdEV70gaiYR202/jmEBEREa1YbR2ww6bCAesMHLAOcF3BrnHguQEXzw8KJrLAZBZ4dIfg0R2CaAjYr0+H7Vf1KURD7ZlOg4dej0e5tB8RERFRq2nrgB1kGAqbeoFNvSZOOUgwMg08Nyh4fsDFwCSQt4Hf7Rb8brfAUMDevQr7t3Hftit6xZFcQSEeBWIr5p4kIiIiam8rMpYppfuv16YUjnu1gamc4IVBwfODgpdHBI4AO0YEO7y+7bUp4NV9Cvv3G1jf1V59244rmM4COVMhHgGiK/IeJSIiImofqyKOpWIKh++jcPg+QMEW7BjWYdvv2x6eAoanBL96wUEiUmol2WeNQqRNWklsRzCVBTKGF7TZOkJERES0LFZFwA6KhBQOXK9w4HrAFcHuceD5QRcvDApG00CmADz1iuCpVwSm4bWS9Ol2knZoJXFcwXRO3454RCESAkIGVx0hIiIiWiqrLmAHGUphYw+wscfEGzYDY2nB9iFd3X5lTOC4wIvDgheHBfc8DazpgBe2DWzo1n/fqvyjQmZUaR3tcFsfVoiIiIioPazqgF2pJ6nQk1R43auAnKVbSV4Y0qE7ZwEj08DItODh7Q5iYeBVaxX261N41VqFeKQ1w7a/jnbeAkKmDtrRENtHiIiIiJqFAbuGWFhh8waFzRtKrSQvDLrYPiQYngZyVmlVEgVgQzewf5+B/fpa9wA3tqMnRGYMffti4eUeEREREdHKw4Bdh2ArycmbgclsqbL90ohuJdk1Duwad/GLZ4GOqJ4o+dr9LKyJSstNlHRdIJMX5ApAPOPCFR60hoiIiKhRGLAXoDOucMQ+CkfsA1iO4OVRHba3Dwomc8B0Hnhyp+DJnRkYCtjUq4rtJL3J1qluuwJkCoKJtCAa0RVtTogkIiIiWhwG7EUKm94qI32A/IFeieSFIcGLQ3qipCvASyO60v0/zwCpGIp9262yDKArQNaraEdCOmiHzeUeFREREVF7YsBuIKUU1nQAazoUjtkPiKcSeOK5aWz3Avd0HpjKAU+8LHjiZX1Eyb16FPZbqw/fvrZjeavbwQmRhqHDdiQEhEzAVKxsExEREdWDAbuJYmGFA9cZOHAdIKInR+qw7WLXuK4c7xwV7BwV3P97IBnVK5P41e3lXJnEdYFcQVe1DQWEQwpRr7JtMGwTERER1cSAvUSUUuhLAX0phdfvbyBv67aRF4f0OttTOSCdLx3kBgDWdwH7rlV41VoDG7oAY5lmIrrByrYCQl5lO+xVtomIiIiohAF7mURDCgeuU8Xq9mjaO6jNkGCnd5CbPRPAngnBr553EAkB+6xReNUahX3XKnQlli9sFyxBwQKU0iusRMNAJASYBtfXJiIiImLAbgHB3u2jXqVXJnllVFe2d4wIRqaBgg08NyB4bkBXt7sTurq97xqFvdcoRJdhsqQI4IggkweyeV1hj4T1xE9T6UPNExEREa02DNgtKGzqSY+v6tM/T+X0USVfHNZtJTkLGM8A4y8JHn9JoBSwoUtXuPdda2B9F2AucTuJAHBcQTYPZCHF6nYkhOJESVa3iYiIaDVgwG4DqZjCazcpvHaTPqrk4CSwY1iwY7g0WVIf6Ebwy+cdRExg7zV6ouQ+a5Zn7W2/up0tANlC+aoknChJREREKxkDdpsxlML6LmB9l8KxrzZQsPUqJDu8tbZHpoGCAzw/KHh+UCfYZNSrbnvtJKnY0teSa61KwiUAiYiIaKVhwG5zkZDC/v0K+/frn6dygpdHSoE7nderkzy9S/D0Lp1ie5M6cO+9RuHQjqVPtpWrkoRDCtEQYHJVEiIiIloBGLBXmFRM4eCNCgdv1KuTjEyXjiS5c1RQcIDRNDCaFjz2kuAnj06iv9NrKelV2NijEF7CCZPBsO33bYe9nu2QAU6UJCIiorbDgL2CKaWwNgWsTSm87lV6EuLABLBjRPDyiIvd44AjwMAkMDApeGS7Prrkhm5gU68O3Bu6FUJLVFb2+7adgjd+eKE76iBT8Hq3DVa5iYiIqLUxYK8ipqGwV48+PPvxBxiwHMGkHcVTL2bx8qhgcEJXlF8ZA14ZE/zqeb3U3l7dCnv36paSpVyhRKBDt+0CmbxuZfGr3CFTV7dDpoJpCEM3ERERtQwG7FUsbCoc0BfGmqguGecs3Uby8qju4x6eBhwX+udRAZ7TrRt+4N7Uu7SBGwhUud3ib0pLAoaBiFflNhRXKiEiIqLlwYBNRbGwwgHrFA5Yp3/OFMoD92gasJ1STzeg+6T36tFhe1OPwvpuILTUa3D7SwLmgSx0lbvYz23qQ7uHDGHoJiIioiXBgE01JSIKr1mv8Jr1+ud0vhS4d456gdstD9ymAWzoVtjUo/u4N3QrhJe4f0NEf7kQ2A6AghRDt2kqhP3WEiUwjFKvN4M3ERERNQIDNtUtGVXYvEFh8wb983RO8MpYeeB2XGDnqGDnKIDnddV4fRew0aty79WtEA0vz2HdRQDXFVj6Nzp0Q08GNUwgpErVbwV96HelBAb8ijiDOBEREc2NAZsWrCNWHrgzecHOMfECtu7hDh5l8uHtAgWgr1MHbv8rGV2eGYoieiIlvJ5ua+Y5AOhQ7X83TaWXDzQVTOgdCGXo4B48WCZDOBER0erV1IB922234atf/Sosy8If//Ef413velfZ6T//+c/xla98BSKCTZs24bOf/Sy6urrqvny/qlgP8f7H3NM8iWh5S0m2INg1Xgrcg1M6eA5OAoOTgkd36HujO1EeuLsTS39o99n4YbmyAg6Uqt2IuJiY9h6T3trdpQq5N+nSUDAAQHnVcwn2i5dfFxEREbWvpgXsgYEBXHfddbjlllsQiURw0UUX4dhjj8UBBxwAAJiensYnPvEJ/OAHP8C6detw/fXX4ytf+Qquvvrquq+jM67geoFkrjjmeuFaXECg9L8BQASuq093Bd6/pRjGGXgWLh5ReHW/wqu9o0wWbMHucV3lfmVUsHtCt5SMZ4DxjOCpV/TGTkT0xMmN3Qp79Sikulr3TpDA48TxH4xOzXMX/6WK//ODuIJpllY/UcVALoBCsU3F3+8wlCpeHh+jREREraVpAXvbtm047rjj0N3dDQA444wzcMcdd+ADH/gAAMCyLHziE5/AunV6yYrNmzfjtttum9d1GKWMMafiPDsTNf/IDy+uqGIYdwGIKDiuFAO4wxC+IJGQwr5rFfZdq3+2Xb329s4xwa4xwSvj+oiOmQLw3IDguQG9YcMP66NNbuzRPdwbuhXikdapcC+EFP/nP36CSw+WnQtAoE3F+1+xMu4deMcwSoG8WFUv/oE+r25jYTAnIiJqtqYF7MHBQfT19RV/7u/vxxNPPFH8uaenB3/4h38IAMjlcvja176GSy65pFnDqYsfOJT3BcPL4xD/H/p0pUO3wKuMi79UHIrVcEf8toDyFgMqCQUOfAOUDu3+yphuLdk1JpjIApZTOviNHw57k/rv9urWXz3J1morabTiY6j8fxXV8pmBvPgz4LWxeAHcXyscgQmcgYp58TkQmPSpApfLxzIREVFtTQvYUuUduFoAmpqawp//+Z/joIMOwlvf+tZ5XceaNR0LHt9SWZtKlYK3K3BRqoI7rq5aloWnZdDTm1yma56pdw1w4L6lnyczLl4advDSkI2XhhzsHnPgCjCaBkbTgv/dqbdaPKKw91rT+wph0xpzyVcraaXtWI0I0NmdmNff+FtQlH58+i0sptIHGAr2kM+1tSXwvdpOp1LAdM5FPJUsuzwjUJL322eC/e3LsWPV15da8utcCu3wmtoO255jbAyOsTE4xuXRtIC9bt06PPLII8WfBwcH0d/fX3aewcFBvO9978Nxxx2Hj370o/O+jpGRabhu65bS+vpSGBmZrnm68iqIfv93af1mpcO4G6yKl1pSgMZVEHt6kxgbTTfmwprktfsksbGjgOP3U7BsE3smgd1+lXtckLP0hMrf77Lx+102gDwUgDUpfdTJDV26raSZVe522I7tPMbg3Taj/QWloF8M/CifXKqUFCeSGmpx1fi+vhSGhqbm90dLbKFvVu3wmtoO255jXDyOsTE4xsZYyGtq0wL2CSecgK985SsYHR1FPB7HXXfdhU996lPF0x3HwZ/+6Z/irLPOwp//+Z83axgtLdiSYhY/kweCLSmlEFBqSZnR+x3MjKLgeBM3/Ql4/vvlSpjAGQ4p7N0L7N1baisZSwO7x0uBe2Ra377hKWB4SvDEy/qGRkPA+i6FDd3Aei94t3sv92oRfKxK7X/U+msA5X3sSqliD7tfFa9XJO0iUwisIOOtDqP86/KDvqDYblP8jvJwX+32ERFR+2tqBfuKK67A1q1bYVkWLrjgAhx22GG47LLLcPnll2PPnj347W9/C8dxcOeddwIAXvva1+Kaa65p1pDaUtlH6KgM4lX/AuEqv50R1L0JnB0xhUJMeRM32y+EK6XQ2wH0digcskn/Lm8J9kwIdo3r4L17Qk+ezNvAjhHBjhHAD11dCWBDl/KCt0JfSh/lkVae8j72uVZ8qS1mCTL54BOjSjtc8X+BnwP/qFV1D05g9dtf9M9S1gdfWk1m5u0jIqLl19R1sLds2YItW7aU/e6GG24AABx66KH43e9+18yrp4AZQd2bwNkRM5D1EnnVVVQEcKDgOgJHAMdpXrtKo0TD5auViAjGM/pgN3u8wD3krck9kQEmMoLf7dY3wlBAX0pXuNd7wbt3hU+gpMYLzkMt+2fdVffq55u5mkypEh/shZ/KukjFjQWNnYiIFo9HcqSiWquohCtaVsraVeBXwxWcYAh3pXSkxGWmlO6/7kkqHLJR/85yBIOTusK9Z0IH78mcvk0Dk8DApOBxb/QRE+jvUljfpVtM1nUqdMYZumnp1beaDBDNC1LxpRsXERGVY8CmeZnRNw7A9NcWD4RwVxQcAWwHsGwdaP1JnK0gbCps7NFra/vSecHAhK5w7xkH9kzq1pKCA+9olIAfaOJhYF2XwrouYF2nwuaYCxFh6CYiIiIGbGo80fO8EFJAKKTDaDBw+6HbFWmZwA0AyajC/v0K+3uL3Yjodbj3eFXugUnBwKQef9YCXhwWvDgMAAI8OoVExAvdnfp7f6dCR5SVbiIiotWGAZuarjJwIwQgCjii4Lr6QDKWDTjSWlVupRS6E0B3QuGgvfTvXFcwkgYGJwR7JnXFe2hKr2ueKQDbhwTbhwC/0p2IAP2dOnT3e+0lqRhDNxER0UrGgE3LxlS6vSRsAiqqq9x67W8F2xFYLuA4LZK2PYahVxrpSykc4v3OcQUFI47nXs5gz4RgcFIwPKXXMM8UKirdAGJhHbr7O4H+lK50dye9A6oQERFR22PAppbgV7n16iaCiFnq5e5KGnByBixHdGuJK2ilY2GYhsJevSbiMHDo3vp3jqvX4x7wAvfAZKnSnbOAl0YELwWWCwybwNqUDt59Xuhe28ElA4mIiNoRAza1LD90R8MKEdML3VHAcfW63ZajvxyntQI3oEN3f6cOzD7XFYymUQzcg5OCoUk9idJygN3jelWT4IFRepO6Wt7XWaqcJ6MM3URERK2MAZvaioh3+Gu/tUTplhLHAQq2/mq1yZM+w1BYmwLWphQO9pYL9NfoHpryAveUDuDpvL6tI9PAyHRpnW5A93UHA3dfSi9DaBoM3kRERK2AAZvamog+Ap4fuBNeL7flrVZSsFtr4mSl4Brdr1lf+n06Lxia0hXuwSnB0KQ+JLxA93XvGBbsCPR1mwro7dDhvS+l20vWphSSXMWEiIhoyTFg04rir89tequVSFQvD2h51e1WbCepJhnVrSCvWlv6neXovu6hSR2+h6d0xTtv6wmVQ1O6Ev504PA+sTCKYXuf9QUklGBNCoiGGLqJiIiahQGbVrTi8oBh3VpRbCfxlwZ0W7OdpJqwWTqapE9EMJUDhqf0yiVDU4Lhad3rLaInVO4cA3aOCR57KVv8u84YsMardK/pUFjTodDboa+DiIiIFocBm1aNynYSeIG7YAN5q73Ctk8pfdj2znjpADkAYLuC0WkveE/r8D2aASYz+gZO5oDJXPma3QDQnfADt/6+tkO3sHA1EyIiovoxYNOqZip9pMl4WIdtywbyDuDY7dFKUkuoyiomPb1J7B6Y1hMnpwRD04KRKcHwtK50A8B4BhjPCJ4fBIqrmQDo8oJ3b9L73qGwJgmE2WpCREQ0AwM2kcdUgBkG4oFWEstbnaQdq9vVxMIKG3uAjT3lbSaZgl6tZHhKf9dfur9bEAje+i+Kf5uKoRi2ezsUepO61SQRYfAmIqLViwGbqEJlK0nSW3vbdv2lAAXiBmNme1NKrzaSjCrss6b0+2Dw9pcL9P/tV7yncsBUrnxFE0BPruwNhO6eJNCbVOiK6+UKiYiIVjIGbKI5+GtvR0z9JVF9oBvbmyxpt9jh3BulVvAGgExB93iPTgtG0t6/03rCJaAD+K5xYFfgwDmA3o7dCb0soR+6/X/Hw0t204iIiJqKAZtonhSAkKG/4hFARCGVMFDIqLadLDlfiYhCohfY1FtejS7Yer3u0bToLy94j2f0YeJdAUa907XShoqGgL6uaaSiLnoSOnR3e98j7PUmIqI2woBNtAh+kI5FFOL+UoCBQ7mvpP7tekRCCuu6gHVd5YHYFcFkFhhL6yUEx9KlIJ7O6/PkbWDniOP9RfkGS0Z15VsHbqWr4An9nRMtiYio1TBgEzVQ5aHck1HAdktrb7f6kSWbxVCqGJD36ys/za96j2UEWTeM3cMFjGUE42kdugEgnddfr4yVt5wA5eG7O1G6nq6EntRJRES01BiwiZpIpHRkyUjgyJK2dyh3y9bV3dUWuIOCVe+e3hjGRnUVW0SQLQBjGWA8LTp0e6uZjKX1JwTA7OE7FtZLDHbHdfDuSih0JRS640BHjIeRJyKi5mDAJlpCxSNLFg/lXlp/u+Ctvy2yclYoWQylFBJRIBEtX1YQKK1wMu6F7/GMYCwDTHgh3K985ywgNwEMTMzs+TYV0JkAuuK62t0V16ucdCX09yir30REtEAM2ETLqPJQ7q54K5S4rHDPprTCSfXwnbNKa3dP+N+z+t/TXs+3I9CtKVUmXAJANAwduOP6aJldcYXOBLC36QC2sPebiIhqYsAmahEi5SuUIATAq3D7kyatVTZpciGUUohH9AovG7pnhmDL0RMuJzKCiawO35OB737rSd4CBi1gcLIygE8D0JfvB+9UrBTEU973KAM4EdGqxYBN1OJMBZjeGtzKmzTpurqlhIF7/sKmwpoOfcj3Sn7f90QWmMzqAD7hBe+JrP7uets6W9Bf1dpPAL3sYGccSMUCwTtW+p6MsgeciGilYsAmaiPFSZPeKiXBwB2scLsM3AsS7PuuVv0WEYTiCby0O4NJL3D73yey+kA7jqvPm7eBoSlgaKp6ADeUnmiZigGdXhU8FSt974jpSZoM4URE7YcBm6iNVQvctfq4afGUUuhMGNjYo2b0fgOlyZd+8J7KlQL4VE4wmdOtJ4CuhOvz+SugAJUhPGSiLHh3+OE7yhBORNTKGLCJVpDZ+rhTcYVcRKHAiZNNE5x8Wa0CDuh1v/3APZXzvmdR+negCm47lRMxgcoQbho6eHdEgQ4viJ9/fJNuIBER1YUBm2gVMBWQjBrIeOHPEbaVLJdISGFtClibqh7A/T7wYOCervyeL4VwxwUmMvrLD98M2EREy4sBm2gVqtZWUmwpcQDbWZ1HnGwFwT7wykPO+/ylCP3QPZ0DpvL6+3RuiQdMREQzMGATrXJ+W0nY0F/+etyu6x0ExxHdx+3yIDitIrgUYX8n+6+JiFoNAzYRlfEDt2kAJqS4PKAr+jDvjqtgB0M3WOkmIiIKYsAmojkVJ08qIGQKohWh2/VDtws4bC8hIqJVjgGbiBYkGLphepVu5bWXCOB6SwU6Xm83q91ERLRaMGATUcMU20sCkyiBiuAtXuXbER3AHT1pj7mbiIhWCgZsImq6suCtAECAKuG7O2HAyRmwHb1soO14B8nxJley+k1ERO2AAZuIllUwfMciChGv3QQohW8/XOuVTbzv3hcDOBERtRoGbCJqWX74VtD/M00gHDi9LIC7XvsJdPuJK7r9xJVSCwoDOBERLQUGbCJqW2UB3PC7TsrbT3SBW6/r7QIQUXBd8ZYcLO8BZwAnIqJGYMAmohXLD8yldb2BygAOlHrAxZ+A6R063nF1O0qwDYWIiGguDNhEtGqVBfBiKbwUwIGZbSj+REzXD+ECuBWtKMHLJiKi1YcBm4hoFpVtKP5EzMoQDnhB3Kt068CtikE8GMYFgYo4K+NERCsOA3abema3g/ufEYylBT1JhZM3K2zeYNY8/91PWXjgWaBgA5EQcOKBwJsOCdc8fzPGMN8x+Zc/kRtHV0zquvy5xrTY01vBariN7SZYCffDduVyhEB5ENffUayMC3RrihTXCtftKTqwM5AHqR3bYT72CNTkBKSzC84RR0P23W/FXediteOYiVYKJdK+H2SOjEzDdVt3+H19KQwNTTX8cp/Z7eD7D7so2PoN2DAAQwE9SSBvYUZouvspC/f8duabsqn07yIhYEMXsHuiFHYrf64Wfuczhkp3P2Xh7t/O/L0/JtOADhKqdPlmxeXv1yfYPqQwOKkPWCJez2wiAnTEAMsBMgUgFdN/Ew0DY+lScDEMb4IbyrdNyAQ6Y3oMtgucd6RRvB2V4dQfw0ROVd0JaHSYrbbdIyHggmP0GJ/Z7eDHj7oIGfogL5YDZAtAMgrYYiBsuJjK6W3kn155G2tdbzNDeWlnqvp2XMxlNnrMezJR/OShdNN3UPww7lfEJfhvvzoupcPS+6FcXKCzO4H+rvmPqdpraiuEWXevTTB27YSZnoJjhKByGUgkBoRCgG1DuQ7sk09r2rjUju0I3XMnYBVKT7xwBPZpZxSv0x+zmZ6Ck0wVxzzbdptr2y5m26sd2xG6/x6IYZa2Uz4HSSRgOnbdY2y2WrfRf/9sxOOvWY/hZr3HNxLH2Bh9fal5/w0DdhM160Fz/Z0WBiarn2aqmWF3PFO74hU29cE86tmKh+8NTOV0oE3nq/+NqYB1XTPDbWUQ+djNVs3rDHnB1hfR7w3wf+X3yjquvp3VHgKG0l/+5QTPV3laNaYBdCcAywYKDhAP64AeDKfTOWA6r29jV9JANu+WhdVaYfiAfuCZPaWdl83r9XatDO3Vfp7O6XEbhq6W+mGrLwX85RlhfP0+G1M5QSSk01nOEoyl9e3Z0GNg15gL19W3LR7R5ynYglRM4dI3Vv9A65ndDr77S307RG96RELARcfNHsorL6NW2C3bTqLvm+BOw0JU29GoZ0einsu9/QlAibugy232joofyteunf+bATDzNbVqSFuKMBu8zmwGKpOGxJMIdXbAGR4GHBfS2QlEovqPLAtIJmGf/46mjCn03f+AGh8tbWAAEIF098K+aGvZmEOxCOypaaj0NGCYAKRmIJ9t21Y9vZCDxBJQVmHOoBi69WYgnQbCXmEkn4OamgREoAwDAl29kEQSSCSW5L6tNNs2WHv0YRh+5IlFP/4W8hiuN5AvZTBc6E5Cu4TXdhjjfK24FpF63sAa3S6x1B/JD9UI14Du8XQc/e89EzqszBaeK6u3s3n8ZcA0BM4swdQRYNd46ee8DfSngKmc4MePCs4DsHmDOet1Vgbfgl3+swqE41r7V341L/hz8N+mMcsAoEPkeKZUGSzYgJvRp6Xz5efNWkC3UoiEFPJZwfd+5SIedjGd1wHMNPyJcvpvH3/Z2xFSQMHSP8fDgp4kMDwl2D4EGEpPlpvMCl4Y0tsdXg8vgBn3weAk8IXbLUzlgFgYmMiKdxAWAN5tUEoVWxnGMvo8pgF0RIGxdO175NbfuMgH7gOBvl9v/Y2LD51TX6j0w248MvOxcMcTLrIFvY38HaFMHvj2NhcdMXdBz5n7nxG4LjCR19vKNPRO0v3PCDZvqPtiql5uyFAwvaAVCQGwpa7LnWs7NEKjyyXmY4/oYOKHtHAYYunf2/MIYfOp1CKfB5SCch1/DUV9HisPpVJ6b1VBh24/YIdC+m+bRE2M6QdoMGD7v4e3nRwHKpuBO+VC+S/CrqMXbxcB8jmYD95f3G5zbdsZp4sLZHNQhQKkuxdIpxG6/x7YqB4U1eQEBAoqPeUtCB940VCq+EahMtOQkAlEYwu6bxdjtm2Aow9ryONvvpcRDOQSjc25nZdCK46J5tb2ATsYXv0WAP9Ib1N5wQ8eFrz9GJS1S9z7tA5QhtIVynufBgBrQSF7tjfNvr7mvKnO9h6qKk6fq8BvO/O77tnCda3L3zWuX89NA/jWAy5KteiFqQzg/lvefLLFXLdDqfJtYxr6vbKagg3sHNEn+gGnN6lDrP87v+LsX4RhlI85Z+sAnLGkOEEu5FVI/fGGDNS8kQL9+Jr0quoG9Pu6fzv9baRU6XeGoZ8n4xmgv7P2tpjIlF+Gf33+7+eiQymKVfXKUDoyXQrXynsACwDLLT1nvv+wIBVz62o/AoDBSUGmUPq0wnX1pw/OIj/xGksLUglV3IkFdIV8th2UerdDK1KTE/oNPWieYXaucFB5upqeqr6nYHt7eabpHcrTKTtNOrsWcAvLxzlrhdAPqCKloO09kdXoCJDLBR7EAY7j/U4VAzlQY9u6DtSeXQh/6+tAJg1JdsA/tJLKpPWT0HX15c0RFCUcKVXdq40peNunp4v9/f71N6qVYrbtOtfjqyGPv3leRqN2KhupFcdEc5ujjrc4t912G84++2ycfvrpuPHGG2ec/vTTT+Ptb387zjjjDHzsYx+DbdtVLqW2FwZd/PhRF1M5QTwCDE3pyprrlHp4MwXgzidLaeqBZ71w7VUV/eDzwLMLu43BN03lVTFDhv59PacvROVrZVCrNsyI6MBaWVleiMqbv5CLm2sMVkW4npOgWDEWQVnF1xWvlaBKQPeHIQLsmZDieaoNb7b7XZ+uitvGRXlGsV1gx5BTNga/nxeYvfJZ66TZNuEzux18/T4bX7jdwksjUvl+PjOUBj9hKCu0KTiurmjvmQAmssBLo4IfPOzimd219w5tt3zCoVL659nagubyzG4HWQvYOeJiaEqQLehBW44O/XMZSwvCFfsE9Ybz5SKdXaVg67NtSDiC0K03I/ytryN0681QO7bXvIyycOAHQ8PUVcpqp9d6MHq/l3iiFHJFAMuCch04Rxy94Nvph3yk02U7Af7tkkQSxWZ3fywi+veAF6JRPcwWz693CvxtJuFI+bYt5KGmpgCl9BiU0j8X8qXr8HrC1MiQDs+OUztsBqstc754ACo9ravdQNVtsBBzbtdqj69sBsjnMXn99frTjGzFnvw8d6ZqPoZrXIaanNCtJEFN/oRkLq04Jppb0yrYAwMDuO6663DLLbcgEongoosuwrHHHosDDjigeJ4rr7wSn/70p3HEEUfgox/9KG666SZcfPHFdV/Hwy+UV4T86lTwNUUJMBxo7SnYusgQpNTMNoR6jaV1uA8KvmnOdTrNX7Ut18yt6bilynPN81RkxfHMzP7wucZY+RZYuRNg1fFpg6B0vcF/1zqvaQARQ1eRv3C7VbU67PcaV15M2Kze/gSg/FObPDCeBaCk2PedzuudkC/cbhWr6srPTN7lh7wdm8lc6XemobOKv+Ncq4ptKsDybnsxZyh/ZY/58z+Jiob0p16Oo8O+7QhME8XbPZuepPL640u/qzecLxfniKMRuvdOYGoiMNtY97KK69b1cfWCqpQ1iIjua47GIImEnrTX2QW7jkrrbJXUOSuE4cjM8K+U/j0Q+Ehq7lcif5upQq60IkwopCv3ygvzSkESSajpKajpKUhPBPpEpxTiXRdqehIwzKoVZ1UoQFIpqGxmRsW6KscBlAFJpeqqkNdjru3qHHG0/vTCwox+exWLQQq2/hkA4oE+8Tl2psru60gEKjNdapMxDCAahX3iKVX/Vjq7ynvXgYZ8QrIYrTgmmlvTKtjbtm3Dcccdh+7ubiQSCZxxxhm44447iqe/8soryOVyOOKIIwAAb3vb28pOr8dEdmZFCKgIAhXvXZHQzNdAEZS96VUKVuO+fp9dVjnrSSpM54ChKcGeCcHQlJ6I5r9p9iTVjGBUz5vqbNfZwvM6yVPsf67BX34tqLK6Ot92nD0TUuy5joSA9V1q1jGs71LoiAIZC4AKtjCVV4ffsLn63x+8F8o+QfL/9s4n3bJPbbri+vxTOR2QpnOCqRwQMfV1xv0WU5RX+QXlVf1gm4tC+Y5zpXVdCsmIF8ihvycj+vcL4X8S1RFTWNNpFNtqC079ExxP3qxgu3pSqYigYOvVb+oJ58vKsksPWNcFCgWIMmpWpCvNVUGsenpQoPoquZyezHjaGbAv3Arrkkthn/+OusL1bJXUuSqEyipAOjr1bTb0bZeOTiiroMe1Zi0kltCnzRay/XAcDkMieicBySRUPqcvJ5kC/J2NaAxihnSVeniw1KPmf/zqV9RFaleHDRPS3QtZ0wfp6q49Lj+Ax2KliaMV22Ah5tqusu9+sE8+rbgNlGVB4kn9s1JAMqnDtmXpbZRMzjnBccZ9ncvpyS7+px7BvfgqnCOO1v3/llXzExK1YztCt96Myeuvn/MTnEaoZ0zUepoWsAcHB9HX11f8ub+/HwMDAzVP7+vrKzu9Hl3x8vAaCtway9Ffjgus6Sj9/sQD9XMr+H4h3u+r8StXlSHCDyD79Qmm86VPCB1HV9zG0oKPfGscmYJgMgsMTAp2jwsGJgXZwuxvqnNdJzVfsyJPNKS/wqHG7ygpQE9oBBBSXrWv8vSAPeN6hRERoCuOmi1Mm3oNxCqmJ8TCumWjWvvT8BTKdnxjYYVERIfnPRM6aEdDQCqu/y4VV0jF9N/0dBjoSXh/KLPcD3PcQSdvVqVl66TUmrTQMBts74hHFPpSCuu79M5BvXMpNm8wcd6RBlIxhWwBSMXUolc1aTbzwfsBxy5Vrv2e48qP7WcJYnOFgxmnG972MAwdzgxDV1bXrEXnX/5lXYF6xu2Yo02lasjP6FaF8Le+rlsVXKcUVrt7AdMs7iQ4RxwNFTIhyRSMvr5SqDRDpeq2UuVhMxSCKhRgn/8OWJdcClm/l+7BHh/VLSDDQ8UADzPwGCm+cUlp0oJ/m2wHoZ/9VI85m9VVcn+7KgNIJCAdKW+pKROA1yfpf+VypZYUYNFV0tlaQPxWGQDFbYBoVK9oEmSa+jE4Cz/whr/1dYR+9lOI4xTva1XI64/yTFPfdz1rINFY7R3CitBfGeqDAV7FGtNKM5e5xkStqWktItVW/1PBSsQcp9fjtMPj+OZ9GThexS4aFtgVKzwoAMdujqKvT/fKXfRGIJlM42ePF5C3BNGwwumHR7DlmGTV6/jmtklEw0A0rMcWDgMTaQc3PSRIRB1k8grxiK5E2d6ndwb0RLMNPcBUVqFgS6nnW+nXua6uBPr6InVf52TGLV7nbG2Kq1Gw1bBV+WPM20BnXGFDr4nhSRdj066+Lxd5n/qXHwn7a4kr5B0Ff0JptWeWV+zWE1BNAyGv1co0BRO50rJE39w2ie5k6fEIAHlLMDzlYq8eQ0/OzLuYzOpqs+sCWUuhM6GDRSbvImvpSZsbegzsHHFRcHT1NxHV5+nu0Es/XntJN7546ySgHGQK+vIq71//3+u6jbKlk57cUcCdj+UwPOkiHtHVYkMBUufzbjb9PZOYSLuIetsoFDIwmXGRs4F/uMPB2k4DZxwRw6H7zn7ZfX3ASYfN++qXzJpgNQLAxOR4abKKz/uoPRSoaEihALWmF93VlrLqOwxWVxz5bdvgjo/D6O5G9IQTED7wwKqnY+0ayOSktzan/khfJWKIn/FmffYFLJc1mZ6CisXK34PMCCQ9hZ6+FKxTTkb2v/4LyrWBcBju9DSQy0B1dEAlEnDFhZqeBnLZ0qzlaBSJU85EuC818zb29pYCWDgMd3gYcF0YqRSUt90qt1n2wFej8D//owenVHnF2p+R7FWszfXr4QwO6tNNE6GQATeXg+T1+IzeHsCy4CoFMxqG5PPedj8T4QMPhPXss8jccgtQKOign9TvfzIxAZWehhGPAZYFUYL4KSfr27gAldtVMhlINgMkEjCSCSCXgbntPsS74ggfeCCm1/RCpqagvNYb0y5A0lOAYVQ9PwBYzz6L7Lb7oAwDSCbgpqehbAtGNAIVjcLxJ4UGHrPB+76qvsOAo6s/Uad/+igkHIKK6DGG41FIoYDQU4+io8bfNMQsY5rzTxd4/y2ldhjjfDUtYK9btw6PPFLaQxwcHER/f3/Z6cPDw8Wfh4aGyk6vR1+8gMM3ucUl9wAgauo31ODSXP/7YgHHvar0eXtP1MGGLr3iSE8S6IlaNddgHByzEY8Atq1fmLMFKX7U3ZMQjFn6udsV15WtoSmBuKVl0dJ5V+88G0BfSl9GwRb85KE01ifydV1nzpLiig09CUFYAYVWT5RLqBmbolq/cT090PVc3mRWEJ209WPV78Gc541QKF9lRKBbPgD92MwWBB88M4Qv3O5izFsHvTJk+60WIsDQZOlxGgsBa1Oq+JwYHLOhFDA6XXpeRbz103eOuFXXFB9PC1zXQTIKjHtV8s4E4Diix23rJQmNaafsOv3ri0eARKT0+B+dLn3yZBh6TfI/PBjFMVauOT7qbdfeZGm97+mc4Gt3TSMerm8lkqDj9tMTqh0HiEcNTKRdTGb1NsxbgomMi3/9+TTevoi1uxtpoW9WletghwNtCEXeHr6VzZevK3zIkcjWWsu2ez1w9tuKP2YBPSu9xunV+qXz3evRByxovdxQMjWzh9WygKS3/m73eqgT3lhaKrBg6cmU0bieYGGYOpz7K4J422RiIgvxx+PdhuoHSOmGymX088Ryqm6z0LPPA7EElJUv75n2t3/gei3L8T6xEkgsDtf2dgC8GcLO4KCu2EaicEMR2G9/V9l27zvwQEg4ottegoWtZAoqMw0nnSnb7ljoGsWV2zWf16008SRcRwAjBGTTSH//B0A06vVLZyGOIBSLwJ2a0n3qyY7S+R0L0/99P+zu9Xq7/ff93l50CHAEyjQBx4EzNQUxwzp4e1V0e2BAb5dwFOjpWdBjKTwyqltPbBfhkAHLdgFlQo2MtuRazu2yxnQ7jHG+mhawTzjhBHzlK1/B6Ogo4vE47rrrLnzqU58qnr5x40ZEo1H8+te/xlFHHYUf/ehHeMMb3jCv63hh0MVvdqD48fKeCb20V09SfywN6LARnFA432XzepIKI9OCrFVaV1iJfl9RSiFk6hUSpvP68hwXgJQ+0XO8nedgP+1ckxx7kgrDU4KcXb6Wcdi7TlcxXS+1ynBtGrP3SM91uj8xF0oWtIdQ/NQZM/882ON/3usMfO9XLnLWzPMFxyfiraHu6p3VY/YvnTsa1utsFw/Q45RPFq4M18W1rL1l8kT0gW3852Q05O0Qe5nBcYBpp3Sdlc8503uuuoKa4Ti4lnbwE56JrP7bnKV3jAEduue7XObmDSZeN6p35ke8Tx0E+tMC//rmmnjZjqS7B2pkBHCDH9ErSKpTf1ztBeB6Jhkup7LJdI4DlU0DjgMxDagd2yH77qc/hvduQ/hbXy/1QsNviQm0YxgGRBmzTgAMXh4AGA8/CPPx3+ijQYYjcA5/Xfla4JMTkEQConQ1WQ0Plu/c+FVspfTkzu4eqExGt3qI6BDpV9e986pMpnxZnuD4qk2cM03I+r0aesCe2bYr8jmonD7gQLGdROnbKrmc/n0qNWtfeOUkWYkn9ITR4vYwoe94FA/fq+w0nEMOLV3GPA7iwgmHVK+m9WCvW7cOV1xxBbZu3Yq3vOUtOPfcc3HYYYfhsssuw5NPPgkA+OIXv4jPfvazOOuss5DNZrF169Z5XUdwFREddvVz038jBWZOKJzvsnn79ek3Zr/HWrwe16i3a5KKlSZniUjxwC4p7/nur3oQXOptrkmOlX3d/pt5xHvfnm3t6p5E7dNaRbWJqY00z06jqmZbbcI0StXimuoMzfNpC1HwWh0Cf1tcjtfrua6cOLd5g4kLjzWwf5/CmpRRV2+5AvC/O2eOUVDqZwb0dfdUdFaZRulLBLjynDD2WaPKHv95W7/w+HONTFM/X7YP6dFVPuccB8hZep7EleeEcekbQzNCbOVa2v7t9HcipnKlqv9Clst8ZrdT3JnftMYo3r3BbaEw+8TLduTuf2CVjz4A96BDin2zC+mJns1cExIXotjDahh65Q0RvZPguFUve0bvsG0DUr6AvMpk9PrXdd4m85nfQhJJ3QecSMJ85rdl11t5nRKLBwYkxXDtHH2c3u4XboV92hllkyTL2nn8B2WNFUSWY+Jc5W0s9vLrilVx8ificXT+5V/qvnSj4g2jIszOuK+iMV0lD0f0dhGBRKM6EIuufkkiCWOXfpGb7+MtuN2EEw5pFk090MyWLVuwZcuWst/dcMMNxX8fdNBB+P73v7/gy69cRaQj6i+dpcOGfxjj4MSm+S6bt31IoSMqyNmlajSktM5xLKyQignyNpAt6OpYOl8KPPEwMOXoj8BrjanadaZigqxVfp2FOloUrjwnjI/ebM19xmW02Ml9lYdSr7S2Q9+nwSNKzldnXH8qYQeOxOgfUr6eAF/vTZytn96vTivoA8GceZh+4wy2QpgGEFa6QpwtVK/ubt5gYvMG/RHX//3qaPH6Kqvf/nPJFR1YfQUb6I4D04XAQWq803RVunQpxSUwAz0pJ29W+PGjAtj6+Wrr1cDQHS+1bwQ/aap8zpmmfv74AbymwHUagU8QRKQ4P6IjUAibz3KZlTvmxasM/nkDduxajbFrp7eKg7fH7328buzaucjDRdU219H9Fkr23Q947BGI21NWfay2FN2M5eOKVWTvSeKvAVvP8ndz3Cb/emdcZzRWqsI6drHq7R5zfNltKlaH//WreoJi5QuKUb2OJvvuBxunlVVum/1JxIzb6FWLiuuJA2UV6pnnn7lMX9XzhEzYp54O2Xe/YtVcKg5z71/HfA/iEtxukp4CkqmW/wSHlkdbH8mxK64wlS0tsRePKNiOoODUDhvzXYt2LC3oiAEpVerBrgzxhgFceGyp99JfG3giB6zpUDh6P8H2IVX3odLH0oJkVC8JBuiPt8fSpeucy1wBdLmt61x4+PUro7Uo6NUZ/ODkH9HQV08fdSKid1T8NZqnA4fbdlx/rpEUfzZQCnT+Jw3vPN7AjdvcmvdDwQubxfdtVTqsvXgtQV2J6o/hC47BjLWn621L8ANw8CAzM1Q0bPvPGX8OwdCUbovyq9IhPzT7f+5dtt+ytnmDifNQGnMk5C/RV7qS4HOw8jmnL1NmDcNrU7qNpbgSF0oV62wBc17nXCp3zP3nmH9/+d/XrrB5OpVtCwDKwknTrnORR+9b7GVXhs/SEYtQ/gJUI7wu5HqrBt4TT6k7uMmatcDY2IydIfT01P6bijaWZpuxXcNhSDhc3gISXMKxjp2Auc4zV0vHQh5v/nbraYPeYVo+bR2wj9lf4cVhFCtjln5NwYVH155oVFlNm6uiXBnI6wnxwYrhQp58lddZWSWv1ncLlCqQpxwE3P3beV9tXcJmaWlDV8oDbz2V6bAx+20AdAjs7wQO2airlhM5ha6YYL++0o6K5fW9Vzpsb+DC4/SG+/JdVlnoEild9uVvDuOZ3c6MEBwygHe8Xr9pVgZLQE+U8++HvpSeKOt4n976lVZ/otvalFvsXfav3xVdHfd3AmJh71NcKYV4f8LfpW+s/vT0H18LsaZDz1fyj2BauQNQGY6Bmc+ZWEj3TMfDOvgmIyhO+HO8SYiJMHDGoaXwERyzPw+iUOM5uJADspxxqIEfPOwWq96GASSjpftiruucS+WYuhJ60qe/zQxDr+Dif8qwUixHv2kzr3M+lx0Mn6Fbb553eF3I9S4m8BYruZFUzWpvKwjeRr89Qyyr5pjr2SaznWeuKjh7qqlZ2jpg799v4LwjjXlV8yqraXP9TbVAPleIX6xq1xmskt/9lIV7flseUBVKBwR50yFhAFZxdRVT6WAXPNpgyNBhyxYDtu1iIjtzHH6/r2HoCuBxr0ZZJT4YeHuSeifgmT36OkOGv5JKaeWHSAi44JjatwHQq7G85ajybVtrR+V7v7Tw5E59PYYCDt0EXHhc6UWyWugKBr/NG0y864Taj4W57gcg+GmF3gkI/n2t6z//deV/70+6XUjwm68zDzPKWkz8CYnFanyNcBx8zqxNKRyzf+m+n++nNHM9B+e7E+xf5ttnqezP93lfKTgm09RzLZJR/VWw578qSbuo5yP6drrOhV72YsPrUmzH5Wj5WKylGPNc17Ecj3FaHZTU03PQoiqXlGqWaoeDrueNdDFLz8x1nXc/VQrQkZCeAKaD9fwuzx9j5eVtXg9M5epva2nmbWjmdmzU39caYz1/v9gx1ssfY+X1Ve4oLWdQrDXGVgivs+1MtZpGLdMHzG+FhUapdZ2NWM5roben3r+rNcbl2I61tMuyaEs1xoXeN9yOjdEuY5wvBuwmapcHDce4eBxjY3CMjdHIgN1K2mXbc4yLxzE2BsfYGAt5TV1ZDYNERERERMuMAZuIiIiIqIHaepKjYTRnIlgjcYyNwTE2BsfYGO0wxoVoh9vFMTYGx9gYHGNjtMMY56ute7CJiIiIiFoNW0SIiIiIiBqIAZuIiIiIqIEYsImIiIiIGogBm4iIiIiogRiwiYiIiIgaiAGbiIiIiKiBGLCJiIiIiBqIAZuIiIiIqIEYsImIiIiIGqgtA/Ztt92Gs88+G6effjpuvPHG5R5O0fT0NM4991zs3LkTALBt2zZs2bIFb37zm3Hdddct8+iAf/zHf8Q555yDc845B5///OcBtN4YAeD666/H2WefjXPOOQf/9m//BqA1x/m5z30OH/nIRwAATz/9NN7+9rfjjDPOwMc+9jHYtr3MowO2bt2Kc845B+effz7OP/98PP744y313Lnnnnvwtre9DWeeeSY+/elPA2i9+/nmm28ubr/zzz8fRx11FD75yU+23DhvvfXW4nP7c5/7HID5PSZb6XERxNfUxuBramO0+msq0Pqvq6vlNRUAIG1mz549cuqpp8rY2Jik02nZsmWLPPvss8s9LHnsscfk3HPPlUMOOURefvllyWazcsopp8hLL70klmXJe9/7XrnvvvuWbXwPPPCAXHjhhZLP56VQKMjWrVvltttua6kxioj86le/kosuukgsy5JsNiunnnqqPP300y03zm3btsmxxx4rH/7wh0VE5JxzzpFHH31URESuuuoqufHGG5dxdCKu68qJJ54olmUVf9dKz52XXnpJTjrpJNm9e7cUCgV55zvfKffdd1/L3c9Bv//97+X000+XXbt2tdQ4M5mMHHPMMTIyMiKWZckFF1wgDzzwQN2PyVZ6XATxNbUx+JraGK3+mirSfq+rK/U11dd2Fext27bhuOOOQ3d3NxKJBM444wzccccdyz0s3HTTTfj4xz+O/v5+AMATTzyBfffdF3vvvTdCoRC2bNmyrOPs6+vDRz7yEUQiEYTDYbz61a/Giy++2FJjBIDXv/71+I//+A+EQiGMjIzAcRxMTk621DjHx8dx3XXX4U//9E8BAK+88gpyuRyOOOIIAMDb3va2Zd+OL7zwApRSuOyyy3DeeefhP//zP1vqufOzn/0MZ599NtavX49wOIzrrrsO8Xi8pe7nSp/4xCdwxRVX4OWXX26pcTqOA9d1kc1mYds2bNtGKBSq+zHZSo+LIL6mNgZfUxuj1V9TgfZ7XV2pr6m+tgvYg4OD6OvrK/7c39+PgYGBZRyRds011+Doo48u/txq4zzwwAOLD4wXX3wRP/3pT6GUaqkx+sLhML785S/jnHPOwfHHH99y2/Lv/u7vcMUVV6CzsxPAzPu6r69v2bfj5OQkjj/+ePzTP/0T/v3f/x3f/e53sWvXrpbZjjt27IDjOHjf+96H8847D9/+9rdb7n4O2rZtG3K5HM4666yWG2dHRwf+8i//EmeddRbe8IY3YOPGjQiHw3U/Jlvt9vj4mto4fE1dvFZ/TQXa63V1Jb+m+touYIvIjN8ppZZhJLNr1XE+++yzeO9734sPf/jD2GeffWac3gpjBIDLL78cDz74IHbv3o0XX3xxxunLNc6bb74ZGzZswPHHH1/8XSve10ceeSQ+//nPI5FIoLe3FxdccAG+/OUvzzjfco3TcRw8+OCD+MIXvoCbbroJTz75ZLHPNmi5t6Pvu9/9Lt7znvcAaL37+3e/+x1+8IMf4N5778UvfvELGIaBBx54YMb5ao2x1W5PLa06Tr6mLg5fUxunnV5XV/Jrqi/UrAE2y7p16/DII48Ufx4cHCx+hNhK1q1bh+Hh4eLPrTDOX//617j88svx0Y9+FOeccw4eeuihlhvj888/j0KhgD/4gz9APB7Hm9/8Ztxxxx0wTbN4nuUc509/+lMMDQ3h/PPPx8TEBDKZDJRSZdtxaGho2bfjI488Asuyim9aIoKNGze2zP29du1aHH/88ejt7QUAvOlNb2qp+zmoUCjg4YcfxrXXXgug9Z7bv/jFL3D88cdjzZo1APRHl9/4xjfqfkzyNXXh+Jq6eHxNbZx2eV1d6a+pvrarYJ9wwgl48MEHMTo6imw2i7vuugtveMMblntYMxx++OHYvn178SObn/zkJ8s6zt27d+P9738/vvjFL+Kcc85pyTECwM6dO3H11VejUCigUCjg7rvvxkUXXdQy4/y3f/s3/OQnP8Gtt96Kyy+/HKeddho++9nPIhqN4te//jUA4Ec/+tGyb8epqSl8/vOfRz6fx/T0NH74wx/iC1/4Qss8d0499VT84he/wOTkJBzHwf33348zzzyzZe7noGeeeQavetWrkEgkALTe8+aggw7Ctm3bkMlkICK455578PrXv77uxyRfUxeGr6mNwdfUxmmX19WV/prqa8sK9hVXXIGtW7fCsixccMEFOOyww5Z7WDNEo1Fce+21+Iu/+Avk83mccsopOPPMM5dtPN/4xjeQz+eLe4wAcNFFF7XUGAHglFNOweOPP463vOUtME0Tb37zm3HOOeegt7e3pcZZ6Ytf/CKuvvpqpNNpHHzwwdi6deuyjufUU08tbkfXdXHxxRfjqKOOapnnzuGHH45LL70UF198MSzLwoknnoh3vvOd2H///Vvufn755Zexfv364s+t9tw+6aST8Nvf/hZve9vbEA6Hceihh+JP/uRPcPrpp9f1mORr6sLwNbW5+Jo6f+3yurrSX1N9Sqo1vxARERER0YK0XYsIEREREVErY8AmIiIiImogBmwiIiIiogZiwCYiIiIiaiAGbCIiIiKiBmLAJiIiIiJqIAZsIiIiIqIGarsDzRAtNdd18ZnPfAaPP/440uk0RASf/vSnsd9+++Gqq67CSy+9hO7ubvT19eHAAw/EX/zFX+D555/HNddcg/HxcTiOg0suuQQXXHDBct8UIqJlx9dUWg0YsInm8Pjjj2NwcBDf+973YBgGvva1r+GGG25AIpHAAQccgH/5l3/B4OAg3va2t+HAAw+Ebdu4/PLL8fnPfx6HHHIIpqamcOGFF+KAAw7AEUccsdw3h4hoWfE1lVYDBmyiORx55JHo6urCd7/7Xbz88sv41a9+hWQyiYcffhg//OEPAQD9/f3FQ7u++OKLeOmll/DRj360eBm5XA6//e1v+WZARKseX1NpNWDAJprDfffdh2uuuQbvec978KY3vQn7778/fvzjHyMUCkFEiuczDD2lwXEcdHZ24tZbby2eNjw8jFQqteRjJyJqNXxNpdWAkxyJ5vDAAw/g1FNPxcUXX4xDDz0UP//5z+E4Dk455RR8//vfBwCMjY3h5z//OZRS2G+//RCNRotvBrt378a5556L//3f/13Om0FE1BL4mkqrgZLg7iIRzfD888/jb/7mb2DbNkzTxNFHH4277roLt956K66++urihBwRwRvf+EZceuml+N3vfleckGPbNrZu3Yp3vvOdy31TiIiWHV9TaTVgwCZaoBtvvBEHH3wwjjzySBQKBVx88cX4i7/4C5xyyinLPTQiorbD11RaSdiDTbRABxxwAD71qU/BdV1YloUzzzyTbwRERAvE11RaSVjBJiIiIiJqIE5yJCIiIiJqIAZsIiIiIqIGYsAmIiIiImogBmwiIiIiogZiwCYiIiIiaiAGbCIiIiKiBvr/AQiF7qPoa1l5AAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "a = np.array(\n", - " [[1.0, 2.0, 3.0], \n", - " [4.0, 5.0, 6.0], \n", - " [7.0, 8.0, 9.0]]\n", - ")\n", - "\n", - "print(\"Numpy array\")\n", - "print(a)\n", - "print()\n", - "\n", - "df = pd.DataFrame(a, columns=['A', 'B', 'C'])\n", - "\n", - "print(\"Pandas DataFrame\")\n", - "print(df)\n", - "print()\n", - "\n", - "print(\"Pretty display of pandas DataFrame\")\n", - "display(HTML(df.to_html(index=False)))\n", - "print()\n", - "\n", - "tensor = torch.from_numpy(a)\n", - "\n", - "print(\"PyTorch tensor\")\n", - "print(tensor)\n", - "print()\n", - "\n", - "# Matplotlib\n", - "\n", - "print(\"Matplolib chart\")\n", - "\n", - "# Prepare the data\n", - "x = np.linspace(0, 10, 100)\n", - "\n", - "# Plot the data\n", - "plt.plot(x, x, label='linear')\n", - "\n", - "# Add a legend\n", - "plt.legend()\n", - "\n", - "# Show the plot\n", - "plt.show()\n", - "\n", - "# Seaborn\n", - "\n", - "print(\"Seaborn chart\")\n", - "\n", - "sns.set_theme(style=\"darkgrid\")\n", - "\n", - "# Load the example Titanic dataset (the dataset may load some time)\n", - "df = sns.load_dataset(\"titanic\")\n", - "\n", - "# Make a custom palette with gendered colors\n", - "pal = dict(male=\"#6495ED\", female=\"#F08080\")\n", - "\n", - "# Show the survival probability as a function of age and sex\n", - "g = sns.lmplot(x=\"age\", y=\"survived\", col=\"sex\", hue=\"sex\", data=df,\n", - " palette=pal, y_jitter=.02, logistic=True, truncate=False)\n", - "g.set(xlim=(0, 80), ylim=(-.05, 1.05))\n", - "\n", - "# Show the plot\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "apparent-first", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/project_1_recommender_and_evaluation.ipynb b/project_1_recommender_and_evaluation.ipynb index fdd484e..063e76f 100644 --- a/project_1_recommender_and_evaluation.ipynb +++ b/project_1_recommender_and_evaluation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 111, "id": "alike-morgan", "metadata": {}, "outputs": [ @@ -10,21 +10,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "Collecting package metadata (current_repodata.json): done\n", - "Solving environment: done\n", + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n", + "Collecting package metadata (current_repodata.json): failed\n", "\n", + "CondaHTTPError: HTTP 000 CONNECTION FAILED for url \n", + "Elapsed: -\n", "\n", - "==> WARNING: A newer version of conda exists. <==\n", - " current version: 4.9.2\n", - " latest version: 4.10.1\n", + "An HTTP error occurred when trying to retrieve this URL.\n", + "HTTP errors are often intermittent, and a simple retry will get you on your way.\n", + "'https://conda.anaconda.org/conda-forge/linux-64'\n", "\n", - "Please update conda by running\n", - "\n", - " $ conda update -n base conda\n", - "\n", - "\n", - "\n", - "# All requested packages already installed.\n", "\n" ] } @@ -45,8 +41,9 @@ "import os\n", "os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'\n", "\n", - "# import sys\n", - "# !conda install --yes --prefix {sys.prefix} numpy hyperopt\n", + "######################\n", + "# I have added hyperopt package to environment.yml. \n", + "######################\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")"