REK-proj-2/project_2_recommender_and_evaluation-Copy1.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "alike-morgan",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n"
     ]
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from IPython.display import Markdown, display, HTML\n",
    "from collections import defaultdict\n",
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "from livelossplot import PlotLosses\n",
    "\n",
    "# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n",
    "import os\n",
    "os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "blessed-knitting",
   "metadata": {},
   "source": [
    "# Load the dataset for recommenders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "victorian-bottom",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>term</th>\n",
       "      <th>length_of_stay_bucket</th>\n",
       "      <th>rate_plan</th>\n",
       "      <th>room_segment</th>\n",
       "      <th>n_people_bucket</th>\n",
       "      <th>weekend_stay</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>WinterVacation</td>\n",
       "      <td>[2-3]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[260-360]</td>\n",
       "      <td>[5-inf]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>WinterVacation</td>\n",
       "      <td>[2-3]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[160-260]</td>\n",
       "      <td>[3-4]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>WinterVacation</td>\n",
       "      <td>[2-3]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[160-260]</td>\n",
       "      <td>[2-2]</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>WinterVacation</td>\n",
       "      <td>[4-7]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[160-260]</td>\n",
       "      <td>[3-4]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>WinterVacation</td>\n",
       "      <td>[4-7]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[0-160]</td>\n",
       "      <td>[2-2]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>Easter</td>\n",
       "      <td>[4-7]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[260-360]</td>\n",
       "      <td>[5-inf]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>6</td>\n",
       "      <td>OffSeason</td>\n",
       "      <td>[2-3]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[260-360]</td>\n",
       "      <td>[5-inf]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>HighSeason</td>\n",
       "      <td>[2-3]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[160-260]</td>\n",
       "      <td>[1-1]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>HighSeason</td>\n",
       "      <td>[2-3]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[0-160]</td>\n",
       "      <td>[1-1]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>HighSeason</td>\n",
       "      <td>[2-3]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[160-260]</td>\n",
       "      <td>[1-1]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>HighSeason</td>\n",
       "      <td>[2-3]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[160-260]</td>\n",
       "      <td>[1-1]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>10</td>\n",
       "      <td>9</td>\n",
       "      <td>HighSeason</td>\n",
       "      <td>[2-3]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[160-260]</td>\n",
       "      <td>[3-4]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>11</td>\n",
       "      <td>9</td>\n",
       "      <td>HighSeason</td>\n",
       "      <td>[2-3]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[160-260]</td>\n",
       "      <td>[3-4]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>12</td>\n",
       "      <td>10</td>\n",
       "      <td>HighSeason</td>\n",
       "      <td>[8-inf]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[160-260]</td>\n",
       "      <td>[3-4]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>14</td>\n",
       "      <td>11</td>\n",
       "      <td>HighSeason</td>\n",
       "      <td>[2-3]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[0-160]</td>\n",
       "      <td>[3-4]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "data_path = os.path.join(\"data\", \"hotel_data\")\n",
    "\n",
    "interactions_df = pd.read_csv(os.path.join(data_path, \"hotel_data_interactions_df.csv\"), index_col=0)\n",
    "\n",
    "base_item_features = ['term', 'length_of_stay_bucket', 'rate_plan', 'room_segment', 'n_people_bucket', 'weekend_stay']\n",
    "\n",
    "column_values_dict = {\n",
    "    'term': ['WinterVacation', 'Easter', 'OffSeason', 'HighSeason', 'LowSeason', 'MayLongWeekend', 'NewYear', 'Christmas'],\n",
    "    'length_of_stay_bucket': ['[0-1]', '[2-3]', '[4-7]', '[8-inf]'],\n",
    "    'rate_plan': ['Standard', 'Nonref'],\n",
    "    'room_segment': ['[0-160]', '[160-260]', '[260-360]', '[360-500]', '[500-900]'],\n",
    "    'n_people_bucket': ['[1-1]', '[2-2]', '[3-4]', '[5-inf]'],\n",
    "    'weekend_stay': ['True', 'False']\n",
    "}\n",
    "\n",
    "interactions_df.loc[:, 'term'] = pd.Categorical(\n",
    "    interactions_df['term'], categories=column_values_dict['term'])\n",
    "interactions_df.loc[:, 'length_of_stay_bucket'] = pd.Categorical(\n",
    "    interactions_df['length_of_stay_bucket'], categories=column_values_dict['length_of_stay_bucket'])\n",
    "interactions_df.loc[:, 'rate_plan'] = pd.Categorical(\n",
    "    interactions_df['rate_plan'], categories=column_values_dict['rate_plan'])\n",
    "interactions_df.loc[:, 'room_segment'] = pd.Categorical(\n",
    "    interactions_df['room_segment'], categories=column_values_dict['room_segment'])\n",
    "interactions_df.loc[:, 'n_people_bucket'] = pd.Categorical(\n",
    "    interactions_df['n_people_bucket'], categories=column_values_dict['n_people_bucket'])\n",
    "interactions_df.loc[:, 'weekend_stay'] = interactions_df['weekend_stay'].astype('str')\n",
    "interactions_df.loc[:, 'weekend_stay'] = pd.Categorical(\n",
    "    interactions_df['weekend_stay'], categories=column_values_dict['weekend_stay'])\n",
    "\n",
    "display(HTML(interactions_df.head(15).to_html()))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "realistic-third",
   "metadata": {},
   "source": [
    "# (Optional) Prepare numerical user features\n",
    "\n",
    "The method below is left here for convenience if you want to experiment with content-based user features as an input for your neural network."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "variable-jaguar",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_length_of_stay_bucket_[0-1]', 'user_length_of_stay_bucket_[2-3]', 'user_length_of_stay_bucket_[4-7]', 'user_length_of_stay_bucket_[8-inf]', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>user_term_WinterVacation</th>\n",
       "      <th>user_term_Easter</th>\n",
       "      <th>user_term_OffSeason</th>\n",
       "      <th>user_term_HighSeason</th>\n",
       "      <th>user_term_LowSeason</th>\n",
       "      <th>user_term_MayLongWeekend</th>\n",
       "      <th>user_term_NewYear</th>\n",
       "      <th>user_term_Christmas</th>\n",
       "      <th>user_length_of_stay_bucket_[0-1]</th>\n",
       "      <th>user_length_of_stay_bucket_[2-3]</th>\n",
       "      <th>user_length_of_stay_bucket_[4-7]</th>\n",
       "      <th>user_length_of_stay_bucket_[8-inf]</th>\n",
       "      <th>user_rate_plan_Standard</th>\n",
       "      <th>user_rate_plan_Nonref</th>\n",
       "      <th>user_room_segment_[0-160]</th>\n",
       "      <th>user_room_segment_[160-260]</th>\n",
       "      <th>user_room_segment_[260-360]</th>\n",
       "      <th>user_room_segment_[360-500]</th>\n",
       "      <th>user_room_segment_[500-900]</th>\n",
       "      <th>user_n_people_bucket_[1-1]</th>\n",
       "      <th>user_n_people_bucket_[2-2]</th>\n",
       "      <th>user_n_people_bucket_[3-4]</th>\n",
       "      <th>user_n_people_bucket_[5-inf]</th>\n",
       "      <th>user_weekend_stay_True</th>\n",
       "      <th>user_weekend_stay_False</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.130435</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.652174</td>\n",
       "      <td>0.086957</td>\n",
       "      <td>0.130435</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.608696</td>\n",
       "      <td>0.391304</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.521739</td>\n",
       "      <td>0.478261</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.869565</td>\n",
       "      <td>0.130435</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.739130</td>\n",
       "      <td>0.173913</td>\n",
       "      <td>0.086957</td>\n",
       "      <td>0.782609</td>\n",
       "      <td>0.217391</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>50</td>\n",
       "      <td>0.043478</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.434783</td>\n",
       "      <td>0.304348</td>\n",
       "      <td>0.217391</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.913043</td>\n",
       "      <td>0.086957</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.260870</td>\n",
       "      <td>0.739130</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.565217</td>\n",
       "      <td>0.434783</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.173913</td>\n",
       "      <td>0.521739</td>\n",
       "      <td>0.304348</td>\n",
       "      <td>0.782609</td>\n",
       "      <td>0.217391</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>92</th>\n",
       "      <td>96</td>\n",
       "      <td>0.083333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.708333</td>\n",
       "      <td>0.125000</td>\n",
       "      <td>0.041667</td>\n",
       "      <td>0.041667</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.041667</td>\n",
       "      <td>0.041667</td>\n",
       "      <td>0.291667</td>\n",
       "      <td>0.708333</td>\n",
       "      <td>0.125000</td>\n",
       "      <td>0.791667</td>\n",
       "      <td>0.083333</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.041667</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.541667</td>\n",
       "      <td>0.083333</td>\n",
       "      <td>0.750000</td>\n",
       "      <td>0.250000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>111</th>\n",
       "      <td>115</td>\n",
       "      <td>0.727273</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.272727</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.363636</td>\n",
       "      <td>0.136364</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.818182</td>\n",
       "      <td>0.181818</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.818182</td>\n",
       "      <td>0.090909</td>\n",
       "      <td>0.045455</td>\n",
       "      <td>0.045455</td>\n",
       "      <td>0.363636</td>\n",
       "      <td>0.636364</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>675</th>\n",
       "      <td>706</td>\n",
       "      <td>0.091988</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.451039</td>\n",
       "      <td>0.189911</td>\n",
       "      <td>0.207715</td>\n",
       "      <td>0.038576</td>\n",
       "      <td>0.011869</td>\n",
       "      <td>0.008902</td>\n",
       "      <td>0.169139</td>\n",
       "      <td>0.459941</td>\n",
       "      <td>0.272997</td>\n",
       "      <td>0.097923</td>\n",
       "      <td>0.994065</td>\n",
       "      <td>0.005935</td>\n",
       "      <td>0.020772</td>\n",
       "      <td>0.839763</td>\n",
       "      <td>0.130564</td>\n",
       "      <td>0.008902</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.041543</td>\n",
       "      <td>0.094955</td>\n",
       "      <td>0.738872</td>\n",
       "      <td>0.124629</td>\n",
       "      <td>0.676558</td>\n",
       "      <td>0.323442</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1699</th>\n",
       "      <td>1736</td>\n",
       "      <td>0.034483</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.482759</td>\n",
       "      <td>0.206897</td>\n",
       "      <td>0.275862</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.241379</td>\n",
       "      <td>0.551724</td>\n",
       "      <td>0.206897</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.172414</td>\n",
       "      <td>0.827586</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.931034</td>\n",
       "      <td>0.068966</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.379310</td>\n",
       "      <td>0.413793</td>\n",
       "      <td>0.206897</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.448276</td>\n",
       "      <td>0.551724</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7639</th>\n",
       "      <td>7779</td>\n",
       "      <td>0.037037</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.296296</td>\n",
       "      <td>0.259259</td>\n",
       "      <td>0.370370</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.037037</td>\n",
       "      <td>0.111111</td>\n",
       "      <td>0.296296</td>\n",
       "      <td>0.481481</td>\n",
       "      <td>0.111111</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.814815</td>\n",
       "      <td>0.185185</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.037037</td>\n",
       "      <td>0.740741</td>\n",
       "      <td>0.222222</td>\n",
       "      <td>0.814815</td>\n",
       "      <td>0.185185</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "def n_to_p(l):\n",
    "    n = sum(l)\n",
    "    return [x / n for x in l] if n > 0 else l\n",
    "\n",
    "def calculate_p(x, values):\n",
    "    counts = [0]*len(values)\n",
    "    for v in x:\n",
    "        counts[values.index(v)] += 1\n",
    "\n",
    "    return n_to_p(counts)\n",
    "\n",
    "def prepare_users_df(interactions_df):\n",
    "\n",
    "    users_df = interactions_df.loc[:, [\"user_id\"]]\n",
    "    users_df = users_df.groupby(\"user_id\").first().reset_index(drop=False)\n",
    "    \n",
    "    user_features = []\n",
    "\n",
    "    for column in base_item_features:\n",
    "\n",
    "        column_values = column_values_dict[column]\n",
    "        df = interactions_df.loc[:, ['user_id', column]]\n",
    "        df = df.groupby('user_id').aggregate(lambda x: list(x)).reset_index(drop=False)\n",
    "\n",
    "        def calc_p(x):\n",
    "            return calculate_p(x, column_values)\n",
    "\n",
    "        df.loc[:, column] = df[column].apply(lambda x: calc_p(x))\n",
    "\n",
    "        p_columns = []\n",
    "        for i in range(len(column_values)):\n",
    "            p_columns.append(\"user_\" + column + \"_\" + column_values[i])\n",
    "            df.loc[:, p_columns[i]] = df[column].apply(lambda x: x[i])\n",
    "            user_features.append(p_columns[i])\n",
    "\n",
    "        users_df = pd.merge(users_df, df.loc[:, ['user_id'] + p_columns], on=[\"user_id\"])\n",
    "    \n",
    "    return users_df, user_features\n",
    "    \n",
    "\n",
    "users_df, user_features = prepare_users_df(interactions_df)\n",
    "\n",
    "print(user_features)\n",
    "\n",
    "display(HTML(users_df.loc[users_df['user_id'].isin([706, 1736, 7779, 96, 1, 50, 115])].head(15).to_html()))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "amino-keyboard",
   "metadata": {},
   "source": [
    "# (Optional) Prepare numerical item features\n",
    "\n",
    "The method below is left here for convenience if you want to experiment with content-based item features as an input for your neural network."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "formal-munich",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'length_of_stay_bucket_[0-1]', 'length_of_stay_bucket_[2-3]', 'length_of_stay_bucket_[4-7]', 'length_of_stay_bucket_[8-inf]', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>item_id</th>\n",
       "      <th>term_WinterVacation</th>\n",
       "      <th>term_Easter</th>\n",
       "      <th>term_OffSeason</th>\n",
       "      <th>term_HighSeason</th>\n",
       "      <th>term_LowSeason</th>\n",
       "      <th>term_MayLongWeekend</th>\n",
       "      <th>term_NewYear</th>\n",
       "      <th>term_Christmas</th>\n",
       "      <th>length_of_stay_bucket_[0-1]</th>\n",
       "      <th>length_of_stay_bucket_[2-3]</th>\n",
       "      <th>length_of_stay_bucket_[4-7]</th>\n",
       "      <th>length_of_stay_bucket_[8-inf]</th>\n",
       "      <th>rate_plan_Standard</th>\n",
       "      <th>rate_plan_Nonref</th>\n",
       "      <th>room_segment_[0-160]</th>\n",
       "      <th>room_segment_[160-260]</th>\n",
       "      <th>room_segment_[260-360]</th>\n",
       "      <th>room_segment_[360-500]</th>\n",
       "      <th>room_segment_[500-900]</th>\n",
       "      <th>n_people_bucket_[1-1]</th>\n",
       "      <th>n_people_bucket_[2-2]</th>\n",
       "      <th>n_people_bucket_[3-4]</th>\n",
       "      <th>n_people_bucket_[5-inf]</th>\n",
       "      <th>weekend_stay_True</th>\n",
       "      <th>weekend_stay_False</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "def map_items_to_onehot(df):\n",
    "    one_hot = pd.get_dummies(df.loc[:, base_item_features])\n",
    "    df = df.drop(base_item_features, axis = 1)\n",
    "    df = df.join(one_hot)\n",
    "    \n",
    "    return df, list(one_hot.columns)\n",
    "\n",
    "def prepare_items_df(interactions_df):\n",
    "    items_df = interactions_df.loc[:, [\"item_id\"] + base_item_features].drop_duplicates()\n",
    "    \n",
    "    items_df, item_features = map_items_to_onehot(items_df)\n",
    "    \n",
    "    return items_df, item_features\n",
    "\n",
    "\n",
    "items_df, item_features = prepare_items_df(interactions_df)\n",
    "\n",
    "print(item_features)\n",
    "\n",
    "display(HTML(items_df.loc[items_df['item_id'].isin([0, 1, 2, 3, 4, 5, 6])].head(15).to_html()))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "figured-imaging",
   "metadata": {},
   "source": [
    "# Neural network recommender\n",
    "\n",
    "<span style=\"color:red\"><font size=\"4\">**Task:**</font></span><br> \n",
    "Code a recommender based on a neural network model. You are free to choose any network architecture you find appropriate. The network can use the interaction vectors for users and items, embeddings of users and items, as well as user and item features (you can use the features you developed in the first project).\n",
    "\n",
    "Remember to keep control over randomness - in the init method add the seed as a parameter and initialize the random seed generator with that seed (both for numpy and pytorch):\n",
    "\n",
    "```python\n",
    "self.seed = seed\n",
    "self.rng = np.random.RandomState(seed=seed)\n",
    "```\n",
    "in the network model:\n",
    "```python\n",
    "self.seed = torch.manual_seed(seed)\n",
    "```\n",
    "\n",
    "You are encouraged to experiment with:\n",
    "  - the number of layers in the network, the number of neurons and different activation functions,\n",
    "  - different optimizers and their parameters,\n",
    "  - batch size and the number of epochs,\n",
    "  - embedding layers,\n",
    "  - content-based features of both users and items."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "unlike-recipient",
   "metadata": {},
   "outputs": [],
   "source": [
    "from recommenders.recommender import Recommender\n",
    "\n",
    "\n",
    "class Net(nn.Module):\n",
    "    def __init__(self, features_len, output_len):\n",
    "        super(Net, self).__init__()\n",
    "        \n",
    "        print(\"IN:\", features_len, \"OUT:\", output_len)\n",
    "        \n",
    "        self.fc1 = nn.Linear(features_len, 150)\n",
    "        self.fc2 = nn.Linear(150, 50)\n",
    "        self.fc3 = nn.Linear(50, 25)\n",
    "        self.fc4 = nn.Linear(25, output_len+500)\n",
    "        \n",
    "    def forward(self, x):\n",
    "        x = F.relu(self.fc1(x))\n",
    "        x = F.relu(self.fc2(x))\n",
    "        x = F.relu(self.fc3(x))\n",
    "        return self.fc4(x)\n",
    "\n",
    "# class Net(nn.Module):\n",
    "#     def __init__(self, features_len):\n",
    "#         super(Net, self).__init__()\n",
    "#         self.hid1 = nn.Linear(features_len, 10)\n",
    "#         self.hid2 = nn.Linear(10, 10)\n",
    "#         self.oupt = nn.Linear(10, 1)\n",
    "\n",
    "#         nn.init.xavier_uniform_(self.hid1.weight)\n",
    "#         nn.init.zeros_(self.hid1.bias)\n",
    "#         nn.init.xavier_uniform_(self.hid2.weight)\n",
    "#         nn.init.zeros_(self.hid2.bias)\n",
    "#         nn.init.xavier_uniform_(self.oupt.weight)\n",
    "#         nn.init.zeros_(self.oupt.bias)\n",
    "\n",
    "#     def forward(self, x):\n",
    "#         z = torch.tanh(self.hid1(x))\n",
    "#         z = torch.tanh(self.hid2(z))\n",
    "#         z = torch.sigmoid(self.oupt(z))\n",
    "#         return z\n",
    "    \n",
    "    \n",
    "class NNRecommender(Recommender):\n",
    "    \"\"\"\n",
    "    Linear recommender class based on user and item features.\n",
    "    \"\"\"\n",
    " \n",
    "    def generate_negative_interaction(self):\n",
    "        user_ids = interactions_df['user_id']\n",
    "        item_ids = interactions_df['item_id']\n",
    " \n",
    "        user_id = user_ids.sample().item()\n",
    "        item_id = item_ids.sample().item()\n",
    "        positive_interactions = interactions_df.loc[\n",
    "            (interactions_df['item_id'] == item_id) & (interactions_df['user_id'] == user_id)]\n",
    " \n",
    "        while not positive_interactions.empty:\n",
    "            user_id = user_ids.sample().item()\n",
    "            item_id = item_ids.sample().item()\n",
    "            positive_interactions = interactions_df.loc[\n",
    "                (interactions_df['item_id'] == item_id) & (interactions_df['user_id'] == user_id)]\n",
    " \n",
    "        return (user_id, item_id, 0)\n",
    " \n",
    "    def generate_negative_interactions(self, n, interactions_df, cross_df):\n",
    "        combined_dfs = pd.concat([cross_df, interactions_df[['user_id', 'item_id']]])\n",
    "        return combined_dfs.drop_duplicates(keep=False).sample(n=n)\n",
    " \n",
    " \n",
    "    def __init__(self, seed=6789, n_neg_per_pos=5):\n",
    "        \"\"\"\n",
    "        Initialize base recommender params and variables.\n",
    "        \"\"\"\n",
    "        self.model = None\n",
    "        self.n_neg_per_pos = n_neg_per_pos\n",
    " \n",
    "        self.recommender_df = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n",
    "        self.users_df = None\n",
    "        self.user_features = None\n",
    " \n",
    "        self.seed = seed\n",
    "        self.rng = np.random.RandomState(seed=seed)\n",
    " \n",
    "    def calculate_accuracy(self, y_true, y_pred):\n",
    "        predicted = y_pred.ge(.5).view(-1)\n",
    "        return (y_true == predicted).sum().float() / len(y_true)\n",
    " \n",
    "    def round_tensor(self, t, decimal_places=3):\n",
    "        return round(t.item(), decimal_places)\n",
    " \n",
    "    def fit(self, interactions_df, users_df, items_df):\n",
    "        \"\"\"\n",
    "        Training of the recommender.\n",
    " \n",
    "        :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items \n",
    "            defined by user_id, item_id and features of the interaction.\n",
    "        :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.\n",
    "        :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.\n",
    "        \"\"\"\n",
    " \n",
    "        interactions_df = interactions_df.copy()\n",
    "        # Prepare users_df and items_df \n",
    "        # (optional - use only if you want to train a hybrid model with content-based features)\n",
    " \n",
    "        users_df, user_features = prepare_users_df(interactions_df)\n",
    " \n",
    "        self.users_df = users_df\n",
    "        self.user_features = user_features\n",
    " \n",
    "        items_df, item_features = prepare_items_df(interactions_df)\n",
    "        items_df = items_df.loc[:, ['item_id'] + item_features]\n",
    " \n",
    "        n_epochs = 51\n",
    "\n",
    "        X = items_df[['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']]\n",
    "        y = items_df[['item_id']]\n",
    "        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=self.seed)\n",
    " \n",
    "        X_train = torch.from_numpy(X_train.to_numpy()).float()\n",
    "        y_train = torch.squeeze(torch.from_numpy(y_train.to_numpy()).long())\n",
    "        X_test = torch.from_numpy(X_test.to_numpy()).float()\n",
    "        y_test = torch.squeeze(torch.from_numpy(y_test.to_numpy()).long())\n",
    " \n",
    "        self.net = Net(X_train.shape[1], items_df['item_id'].unique().size)\n",
    " \n",
    "        optimizer = optim.Adam(self.net.parameters(), lr=0.05)\n",
    "        criterion = nn.CrossEntropyLoss()\n",
    " \n",
    "        for epoch in range(n_epochs):\n",
    "            y_pred = self.net(X_train)\n",
    "            y_pred = torch.squeeze(y_pred)\n",
    "            train_loss = criterion(y_pred, y_train)\n",
    " \n",
    "#             if epoch % 5000 == 0:\n",
    "#                 train_acc = self.calculate_accuracy(y_train, y_pred)\n",
    "#                 y_test_pred = self.net(X_test)\n",
    "#                 y_test_pred = torch.squeeze(y_test_pred)\n",
    "#                 test_loss = criterion(y_test_pred, y_test)\n",
    "#                 test_acc = self.calculate_accuracy(y_test, y_test_pred)\n",
    "#                 print(\n",
    "#         f'''epoch {epoch}\n",
    "#         Train set - loss: {self.round_tensor(train_loss)}, accuracy: {self.round_tensor(train_acc)}\n",
    "#         Test  set - loss: {self.round_tensor(test_loss)}, accuracy: {self.round_tensor(test_acc)}\n",
    "#         ''')\n",
    " \n",
    "            optimizer.zero_grad()\n",
    "            train_loss.backward()\n",
    "            optimizer.step()\n",
    " \n",
    "    def recommend(self, users_df, items_df, n_recommendations=1):\n",
    "        \"\"\"\n",
    "        Serving of recommendations. Scores items in items_df for each user in users_df and returns \n",
    "        top n_recommendations for each user.\n",
    " \n",
    "        :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.\n",
    "        :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.\n",
    "        :param int n_recommendations: Number of recommendations to be returned for each user.\n",
    "        :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations \n",
    "            for each user.\n",
    "        :rtype: pd.DataFrame\n",
    "        \"\"\"\n",
    " \n",
    "        # Clean previous recommendations (iloc could be used alternatively)\n",
    "        self.recommender_df = self.recommender_df[:0]\n",
    " \n",
    "        # Prepare users_df and items_df\n",
    "        # (optional - use only if you want to train a hybrid model with content-based features)\n",
    " \n",
    "        users_df = users_df.loc[:, 'user_id']\n",
    "        users_df = pd.merge(users_df, self.users_df, on=['user_id'], how='left').fillna(0)\n",
    " \n",
    "        #         items_df, item_features = prepare_items_df(items_df)\n",
    "        #         items_df = items_df.loc[:, ['item_id'] + item_features]\n",
    " \n",
    "        # Score the items\n",
    " \n",
    "        recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n",
    " \n",
    "        for ix, user in users_df.iterrows():\n",
    "            prep_user = torch.from_numpy(user[['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']].to_numpy()).float()\n",
    "            \n",
    "            scores = self.net(prep_user).detach().numpy()\n",
    " \n",
    "            chosen_ids = np.argsort(-scores)[:n_recommendations]\n",
    " \n",
    "            recommendations = []\n",
    "            for item_id in chosen_ids:\n",
    "                recommendations.append(\n",
    "                    {\n",
    "                        'user_id': user['user_id'],\n",
    "                        'item_id': item_id,\n",
    "                        'score': scores[item_id]\n",
    "                    }\n",
    "                )\n",
    " \n",
    "            user_recommendations = pd.DataFrame(recommendations)\n",
    " \n",
    "            self.recommender_df = pd.concat([self.recommender_df, user_recommendations])\n",
    " \n",
    "        return self.recommender_df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "copyrighted-relative",
   "metadata": {},
   "source": [
    "# Quick test of the recommender"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "greatest-canon",
   "metadata": {},
   "outputs": [],
   "source": [
    "items_df = interactions_df.loc[:, ['item_id'] + base_item_features].drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "initial-capital",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'train_test_split' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-23-851d3aa5378e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Fit method\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mnn_recommender\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mNNRecommender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mnn_recommender\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minteractions_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      4\u001b[0m \u001b[0;31m# nn_recommender.fit(interactions_df, None, None)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-21-ca3049874457>\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, interactions_df, users_df, items_df)\u001b[0m\n\u001b[1;32m    114\u001b[0m         \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mitems_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'term_WinterVacation'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_Easter'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_OffSeason'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_HighSeason'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_LowSeason'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_MayLongWeekend'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_NewYear'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_Christmas'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rate_plan_Standard'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rate_plan_Nonref'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[0-160]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[160-260]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[260-360]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[360-500]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[500-900]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'n_people_bucket_[1-1]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'n_people_bucket_[2-2]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'n_people_bucket_[3-4]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'n_people_bucket_[5-inf]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'weekend_stay_True'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'weekend_stay_False'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    115\u001b[0m         \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mitems_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'item_id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 116\u001b[0;31m         \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    118\u001b[0m         \u001b[0mX_train\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'train_test_split' is not defined"
     ]
    }
   ],
   "source": [
    "# Fit method\n",
    "nn_recommender = NNRecommender()\n",
    "nn_recommender.fit(interactions_df.head(1000), None, None)\n",
    "# nn_recommender.fit(interactions_df, None, None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "id": "digital-consolidation",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>score</th>\n",
       "      <th>term</th>\n",
       "      <th>length_of_stay_bucket</th>\n",
       "      <th>rate_plan</th>\n",
       "      <th>room_segment</th>\n",
       "      <th>n_people_bucket</th>\n",
       "      <th>weekend_stay</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>88</td>\n",
       "      <td>37.715969</td>\n",
       "      <td>WinterVacation</td>\n",
       "      <td>[0-1]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[160-260]</td>\n",
       "      <td>[2-2]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>57</td>\n",
       "      <td>36.182877</td>\n",
       "      <td>WinterVacation</td>\n",
       "      <td>[2-3]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[160-260]</td>\n",
       "      <td>[2-2]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>69</td>\n",
       "      <td>35.771114</td>\n",
       "      <td>WinterVacation</td>\n",
       "      <td>[4-7]</td>\n",
       "      <td>Standard</td>\n",
       "      <td>[160-260]</td>\n",
       "      <td>[2-2]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Recommender method\n",
    "\n",
    "recommendations = nn_recommender.recommend(pd.DataFrame([[1]], columns=['user_id']), items_df, 3)\n",
    "\n",
    "recommendations = pd.merge(recommendations, items_df, on='item_id', how='left')\n",
    "display(HTML(recommendations.to_html()))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "advanced-eleven",
   "metadata": {},
   "source": [
    "# Tuning method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "id": "strange-alaska",
   "metadata": {},
   "outputs": [],
   "source": [
    "from evaluation_and_testing.testing import evaluate_train_test_split_implicit\n",
    "\n",
    "seed = 6789"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "id": "stable-theta",
   "metadata": {},
   "outputs": [],
   "source": [
    "from hyperopt import hp, fmin, tpe, Trials\n",
    "import traceback\n",
    "\n",
    "def tune_recommender(recommender_class, interactions_df, items_df, \n",
    "                     param_space, max_evals=1, show_progressbar=True, seed=6789):\n",
    "    # Split into train_validation and test sets\n",
    "\n",
    "    shuffle = np.arange(len(interactions_df))\n",
    "    rng = np.random.RandomState(seed=seed)\n",
    "    rng.shuffle(shuffle)\n",
    "    shuffle = list(shuffle)\n",
    "\n",
    "    train_test_split = 0.8\n",
    "    split_index = int(len(interactions_df) * train_test_split)\n",
    "\n",
    "    train_validation = interactions_df.iloc[shuffle[:split_index]]\n",
    "    test = interactions_df.iloc[shuffle[split_index:]]\n",
    "\n",
    "    # Tune\n",
    "\n",
    "    def loss(tuned_params):\n",
    "        recommender = recommender_class(seed=seed, **tuned_params)\n",
    "        hr1, hr3, hr5, hr10, ndcg1, ndcg3, ndcg5, ndcg10 = evaluate_train_test_split_implicit(\n",
    "            recommender, train_validation, items_df, seed=seed)\n",
    "        return -hr10\n",
    "\n",
    "    n_tries = 1\n",
    "    succeded = False\n",
    "    try_id = 0\n",
    "    while not succeded and try_id < n_tries:\n",
    "        try:\n",
    "            trials = Trials()\n",
    "            best_param_set = fmin(loss, space=param_space, algo=tpe.suggest, \n",
    "                                  max_evals=max_evals, show_progressbar=show_progressbar, trials=trials, verbose=True)\n",
    "            succeded = True\n",
    "        except:\n",
    "            traceback.print_exc()\n",
    "            try_id += 1\n",
    "            \n",
    "    if not succeded:\n",
    "        return None\n",
    "        \n",
    "    # Validate\n",
    "    \n",
    "    recommender = recommender_class(seed=seed, **best_param_set)\n",
    "\n",
    "    results = [[recommender_class.__name__] + list(evaluate_train_test_split_implicit(\n",
    "        recommender, {'train': train_validation, 'test': test}, items_df, seed=seed))]\n",
    "\n",
    "    results = pd.DataFrame(results, \n",
    "                           columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
    "\n",
    "    display(HTML(results.to_html()))\n",
    "    \n",
    "    return best_param_set"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "reliable-switzerland",
   "metadata": {},
   "source": [
    "## Tuning of the recommender\n",
    "\n",
    "<span style=\"color:red\"><font size=\"4\">**Task:**</font></span><br> \n",
    "Tune your model using the code below. You only need to put the class name of your recommender and choose an appropriate parameter space."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "id": "obvious-astrology",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "IN:                                                   \n",
      "21                                                    \n",
      "OUT:                                                  \n",
      "691                                                   \n",
      "IN:                                                                                \n",
      "21                                                                                 \n",
      "OUT:                                                                               \n",
      "691                                                                                \n",
      "IN:                                                                                \n",
      "21                                                                                 \n",
      "OUT:                                                                               \n",
      "691                                                                                \n",
      "IN:                                                                                \n",
      "21                                                                                 \n",
      "OUT:                                                                               \n",
      "691                                                                                \n",
      "IN:                                                                                \n",
      "21                                                                                 \n",
      "OUT:                                                                               \n",
      "691                                                                                \n",
      "IN:                                                                                \n",
      "21                                                                                 \n",
      "OUT:                                                                               \n",
      "691                                                                                \n",
      "IN:                                                                                \n",
      "21                                                                                 \n",
      "OUT:                                                                               \n",
      "691                                                                                \n",
      "IN:                                                                                \n",
      "21                                                                                 \n",
      "OUT:                                                                               \n",
      "691                                                                                \n",
      "IN:                                                                                \n",
      "21                                                                                 \n",
      "OUT:                                                                               \n",
      "691                                                                                \n",
      "IN:                                                                                \n",
      "21                                                                                 \n",
      "OUT:                                                                               \n",
      "691                                                                                \n",
      "100%|██████████| 10/10 [18:34<00:00, 111.50s/trial, best loss: -0.04424416222859484]\n",
      "IN: 21 OUT: 736\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Recommender</th>\n",
       "      <th>HR@1</th>\n",
       "      <th>HR@3</th>\n",
       "      <th>HR@5</th>\n",
       "      <th>HR@10</th>\n",
       "      <th>NDCG@1</th>\n",
       "      <th>NDCG@3</th>\n",
       "      <th>NDCG@5</th>\n",
       "      <th>NDCG@10</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NNRecommender</td>\n",
       "      <td>0.010201</td>\n",
       "      <td>0.020072</td>\n",
       "      <td>0.026324</td>\n",
       "      <td>0.035538</td>\n",
       "      <td>0.010201</td>\n",
       "      <td>0.01574</td>\n",
       "      <td>0.018216</td>\n",
       "      <td>0.021141</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best parameters:\n",
      "{'n_neg_per_pos': 9.0}\n"
     ]
    }
   ],
   "source": [
    "param_space = {\n",
    "    'n_neg_per_pos': hp.quniform('n_neg_per_pos', 1, 10, 1)\n",
    "}\n",
    "items_df['item_id'].unique().size\n",
    "\n",
    "best_param_set = tune_recommender(NNRecommender, interactions_df, items_df,\n",
    "                                  param_space, max_evals=10, show_progressbar=True, seed=seed)\n",
    "\n",
    "print(\"Best parameters:\")\n",
    "print(best_param_set)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "accredited-strap",
   "metadata": {},
   "source": [
    "# Final evaluation\n",
    "\n",
    "<span style=\"color:red\"><font size=\"4\">**Task:**</font></span><br> \n",
    "Run the final evaluation of your recommender and present its results against the Amazon and Netflix recommenders' results. You just need to give the class name of your recommender and its tuned parameters below."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "id": "given-homework",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "IN: 21 OUT: 736\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Recommender</th>\n",
       "      <th>HR@1</th>\n",
       "      <th>HR@3</th>\n",
       "      <th>HR@5</th>\n",
       "      <th>HR@10</th>\n",
       "      <th>NDCG@1</th>\n",
       "      <th>NDCG@3</th>\n",
       "      <th>NDCG@5</th>\n",
       "      <th>NDCG@10</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NNRecommender</td>\n",
       "      <td>0.003949</td>\n",
       "      <td>0.015137</td>\n",
       "      <td>0.019743</td>\n",
       "      <td>0.026654</td>\n",
       "      <td>0.003949</td>\n",
       "      <td>0.010361</td>\n",
       "      <td>0.01223</td>\n",
       "      <td>0.014409</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "nn_recommender = NNRecommender(n_neg_per_pos=9)  # Initialize your recommender here\n",
    "\n",
    "# Give the name of your recommender in the line below\n",
    "nn_tts_results = [['NNRecommender'] + list(evaluate_train_test_split_implicit(\n",
    "    nn_recommender, interactions_df, items_df))]\n",
    "\n",
    "nn_tts_results = pd.DataFrame(\n",
    "    nn_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
    "\n",
    "display(HTML(nn_tts_results.to_html()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 199,
   "id": "suited-nomination",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Recommender</th>\n",
       "      <th>HR@1</th>\n",
       "      <th>HR@3</th>\n",
       "      <th>HR@5</th>\n",
       "      <th>HR@10</th>\n",
       "      <th>NDCG@1</th>\n",
       "      <th>NDCG@3</th>\n",
       "      <th>NDCG@5</th>\n",
       "      <th>NDCG@10</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AmazonRecommender</td>\n",
       "      <td>0.042119</td>\n",
       "      <td>0.10464</td>\n",
       "      <td>0.140507</td>\n",
       "      <td>0.199408</td>\n",
       "      <td>0.042119</td>\n",
       "      <td>0.076826</td>\n",
       "      <td>0.091797</td>\n",
       "      <td>0.110705</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from recommenders.amazon_recommender import AmazonRecommender\n",
    "\n",
    "amazon_recommender = AmazonRecommender()\n",
    "\n",
    "amazon_tts_results = [['AmazonRecommender'] + list(evaluate_train_test_split_implicit(\n",
    "    amazon_recommender, interactions_df, items_df))]\n",
    "\n",
    "amazon_tts_results = pd.DataFrame(\n",
    "    amazon_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
    "\n",
    "display(HTML(amazon_tts_results.to_html()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "conservative-remedy",
   "metadata": {},
   "outputs": [],
   "source": [
    "from recommenders.netflix_recommender import NetflixRecommender\n",
    "\n",
    "netflix_recommender = NetflixRecommender(n_epochs=30, print_type='live')\n",
    "\n",
    "netflix_tts_results = [['NetflixRecommender'] + list(evaluate_train_test_split_implicit(\n",
    "    netflix_recommender, interactions_df, items_df))]\n",
    "\n",
    "netflix_tts_results = pd.DataFrame(\n",
    "    netflix_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
    "\n",
    "display(HTML(netflix_tts_results.to_html()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "moderate-printing",
   "metadata": {},
   "outputs": [],
   "source": [
    "tts_results = pd.concat([nn_tts_results, amazon_tts_results, netflix_tts_results]).reset_index(drop=True)\n",
    "display(HTML(tts_results.to_html()))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "uniform-vegetable",
   "metadata": {},
   "source": [
    "# Summary\n",
    "\n",
    "<span style=\"color:red\"><font size=\"4\">**Task:**</font></span><br> \n",
    "Write a summary of your experiments. What worked well and what did not? What are your thoughts how could you possibly further improve the model?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "declared-howard",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "rek_uno",
   "language": "python",
   "name": "rek_uno"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}