1688 lines
64 KiB
Plaintext
1688 lines
64 KiB
Plaintext
![]() |
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 17,
|
||
|
"id": "alike-morgan",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"The autoreload extension is already loaded. To reload it, use:\n",
|
||
|
" %reload_ext autoreload\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"%matplotlib inline\n",
|
||
|
"%load_ext autoreload\n",
|
||
|
"%autoreload 2\n",
|
||
|
"\n",
|
||
|
"import numpy as np\n",
|
||
|
"import pandas as pd\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"import seaborn as sns\n",
|
||
|
"from IPython.display import Markdown, display, HTML\n",
|
||
|
"from collections import defaultdict\n",
|
||
|
"\n",
|
||
|
"import torch\n",
|
||
|
"import torch.nn as nn\n",
|
||
|
"import torch.optim as optim\n",
|
||
|
"from livelossplot import PlotLosses\n",
|
||
|
"\n",
|
||
|
"# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n",
|
||
|
"import os\n",
|
||
|
"os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "blessed-knitting",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Load the dataset for recommenders"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 18,
|
||
|
"id": "victorian-bottom",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user_id</th>\n",
|
||
|
" <th>item_id</th>\n",
|
||
|
" <th>term</th>\n",
|
||
|
" <th>length_of_stay_bucket</th>\n",
|
||
|
" <th>rate_plan</th>\n",
|
||
|
" <th>room_segment</th>\n",
|
||
|
" <th>n_people_bucket</th>\n",
|
||
|
" <th>weekend_stay</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[260-360]</td>\n",
|
||
|
" <td>[5-inf]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[3-4]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[2-2]</td>\n",
|
||
|
" <td>False</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[4-7]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[3-4]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[4-7]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[0-160]</td>\n",
|
||
|
" <td>[2-2]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>Easter</td>\n",
|
||
|
" <td>[4-7]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[260-360]</td>\n",
|
||
|
" <td>[5-inf]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>OffSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[260-360]</td>\n",
|
||
|
" <td>[5-inf]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[1-1]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>9</td>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[0-160]</td>\n",
|
||
|
" <td>[1-1]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[1-1]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>10</th>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[1-1]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>11</th>\n",
|
||
|
" <td>10</td>\n",
|
||
|
" <td>9</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[3-4]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>12</th>\n",
|
||
|
" <td>11</td>\n",
|
||
|
" <td>9</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[3-4]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>13</th>\n",
|
||
|
" <td>12</td>\n",
|
||
|
" <td>10</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[8-inf]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[3-4]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>14</th>\n",
|
||
|
" <td>14</td>\n",
|
||
|
" <td>11</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[0-160]</td>\n",
|
||
|
" <td>[3-4]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"data_path = os.path.join(\"data\", \"hotel_data\")\n",
|
||
|
"\n",
|
||
|
"interactions_df = pd.read_csv(os.path.join(data_path, \"hotel_data_interactions_df.csv\"), index_col=0)\n",
|
||
|
"\n",
|
||
|
"base_item_features = ['term', 'length_of_stay_bucket', 'rate_plan', 'room_segment', 'n_people_bucket', 'weekend_stay']\n",
|
||
|
"\n",
|
||
|
"column_values_dict = {\n",
|
||
|
" 'term': ['WinterVacation', 'Easter', 'OffSeason', 'HighSeason', 'LowSeason', 'MayLongWeekend', 'NewYear', 'Christmas'],\n",
|
||
|
" 'length_of_stay_bucket': ['[0-1]', '[2-3]', '[4-7]', '[8-inf]'],\n",
|
||
|
" 'rate_plan': ['Standard', 'Nonref'],\n",
|
||
|
" 'room_segment': ['[0-160]', '[160-260]', '[260-360]', '[360-500]', '[500-900]'],\n",
|
||
|
" 'n_people_bucket': ['[1-1]', '[2-2]', '[3-4]', '[5-inf]'],\n",
|
||
|
" 'weekend_stay': ['True', 'False']\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"interactions_df.loc[:, 'term'] = pd.Categorical(\n",
|
||
|
" interactions_df['term'], categories=column_values_dict['term'])\n",
|
||
|
"interactions_df.loc[:, 'length_of_stay_bucket'] = pd.Categorical(\n",
|
||
|
" interactions_df['length_of_stay_bucket'], categories=column_values_dict['length_of_stay_bucket'])\n",
|
||
|
"interactions_df.loc[:, 'rate_plan'] = pd.Categorical(\n",
|
||
|
" interactions_df['rate_plan'], categories=column_values_dict['rate_plan'])\n",
|
||
|
"interactions_df.loc[:, 'room_segment'] = pd.Categorical(\n",
|
||
|
" interactions_df['room_segment'], categories=column_values_dict['room_segment'])\n",
|
||
|
"interactions_df.loc[:, 'n_people_bucket'] = pd.Categorical(\n",
|
||
|
" interactions_df['n_people_bucket'], categories=column_values_dict['n_people_bucket'])\n",
|
||
|
"interactions_df.loc[:, 'weekend_stay'] = interactions_df['weekend_stay'].astype('str')\n",
|
||
|
"interactions_df.loc[:, 'weekend_stay'] = pd.Categorical(\n",
|
||
|
" interactions_df['weekend_stay'], categories=column_values_dict['weekend_stay'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(interactions_df.head(15).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "realistic-third",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# (Optional) Prepare numerical user features\n",
|
||
|
"\n",
|
||
|
"The method below is left here for convenience if you want to experiment with content-based user features as an input for your neural network."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 19,
|
||
|
"id": "variable-jaguar",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_length_of_stay_bucket_[0-1]', 'user_length_of_stay_bucket_[2-3]', 'user_length_of_stay_bucket_[4-7]', 'user_length_of_stay_bucket_[8-inf]', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user_id</th>\n",
|
||
|
" <th>user_term_WinterVacation</th>\n",
|
||
|
" <th>user_term_Easter</th>\n",
|
||
|
" <th>user_term_OffSeason</th>\n",
|
||
|
" <th>user_term_HighSeason</th>\n",
|
||
|
" <th>user_term_LowSeason</th>\n",
|
||
|
" <th>user_term_MayLongWeekend</th>\n",
|
||
|
" <th>user_term_NewYear</th>\n",
|
||
|
" <th>user_term_Christmas</th>\n",
|
||
|
" <th>user_length_of_stay_bucket_[0-1]</th>\n",
|
||
|
" <th>user_length_of_stay_bucket_[2-3]</th>\n",
|
||
|
" <th>user_length_of_stay_bucket_[4-7]</th>\n",
|
||
|
" <th>user_length_of_stay_bucket_[8-inf]</th>\n",
|
||
|
" <th>user_rate_plan_Standard</th>\n",
|
||
|
" <th>user_rate_plan_Nonref</th>\n",
|
||
|
" <th>user_room_segment_[0-160]</th>\n",
|
||
|
" <th>user_room_segment_[160-260]</th>\n",
|
||
|
" <th>user_room_segment_[260-360]</th>\n",
|
||
|
" <th>user_room_segment_[360-500]</th>\n",
|
||
|
" <th>user_room_segment_[500-900]</th>\n",
|
||
|
" <th>user_n_people_bucket_[1-1]</th>\n",
|
||
|
" <th>user_n_people_bucket_[2-2]</th>\n",
|
||
|
" <th>user_n_people_bucket_[3-4]</th>\n",
|
||
|
" <th>user_n_people_bucket_[5-inf]</th>\n",
|
||
|
" <th>user_weekend_stay_True</th>\n",
|
||
|
" <th>user_weekend_stay_False</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0.130435</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.652174</td>\n",
|
||
|
" <td>0.086957</td>\n",
|
||
|
" <td>0.130435</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.608696</td>\n",
|
||
|
" <td>0.391304</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.521739</td>\n",
|
||
|
" <td>0.478261</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.869565</td>\n",
|
||
|
" <td>0.130435</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.739130</td>\n",
|
||
|
" <td>0.173913</td>\n",
|
||
|
" <td>0.086957</td>\n",
|
||
|
" <td>0.782609</td>\n",
|
||
|
" <td>0.217391</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>47</th>\n",
|
||
|
" <td>50</td>\n",
|
||
|
" <td>0.043478</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.434783</td>\n",
|
||
|
" <td>0.304348</td>\n",
|
||
|
" <td>0.217391</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.913043</td>\n",
|
||
|
" <td>0.086957</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.260870</td>\n",
|
||
|
" <td>0.739130</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.565217</td>\n",
|
||
|
" <td>0.434783</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.173913</td>\n",
|
||
|
" <td>0.521739</td>\n",
|
||
|
" <td>0.304348</td>\n",
|
||
|
" <td>0.782609</td>\n",
|
||
|
" <td>0.217391</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>92</th>\n",
|
||
|
" <td>96</td>\n",
|
||
|
" <td>0.083333</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.708333</td>\n",
|
||
|
" <td>0.125000</td>\n",
|
||
|
" <td>0.041667</td>\n",
|
||
|
" <td>0.041667</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.250000</td>\n",
|
||
|
" <td>0.666667</td>\n",
|
||
|
" <td>0.041667</td>\n",
|
||
|
" <td>0.041667</td>\n",
|
||
|
" <td>0.291667</td>\n",
|
||
|
" <td>0.708333</td>\n",
|
||
|
" <td>0.125000</td>\n",
|
||
|
" <td>0.791667</td>\n",
|
||
|
" <td>0.083333</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.041667</td>\n",
|
||
|
" <td>0.333333</td>\n",
|
||
|
" <td>0.541667</td>\n",
|
||
|
" <td>0.083333</td>\n",
|
||
|
" <td>0.750000</td>\n",
|
||
|
" <td>0.250000</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>111</th>\n",
|
||
|
" <td>115</td>\n",
|
||
|
" <td>0.727273</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.272727</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.500000</td>\n",
|
||
|
" <td>0.363636</td>\n",
|
||
|
" <td>0.136364</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>1.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.818182</td>\n",
|
||
|
" <td>0.181818</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.818182</td>\n",
|
||
|
" <td>0.090909</td>\n",
|
||
|
" <td>0.045455</td>\n",
|
||
|
" <td>0.045455</td>\n",
|
||
|
" <td>0.363636</td>\n",
|
||
|
" <td>0.636364</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>675</th>\n",
|
||
|
" <td>706</td>\n",
|
||
|
" <td>0.091988</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.451039</td>\n",
|
||
|
" <td>0.189911</td>\n",
|
||
|
" <td>0.207715</td>\n",
|
||
|
" <td>0.038576</td>\n",
|
||
|
" <td>0.011869</td>\n",
|
||
|
" <td>0.008902</td>\n",
|
||
|
" <td>0.169139</td>\n",
|
||
|
" <td>0.459941</td>\n",
|
||
|
" <td>0.272997</td>\n",
|
||
|
" <td>0.097923</td>\n",
|
||
|
" <td>0.994065</td>\n",
|
||
|
" <td>0.005935</td>\n",
|
||
|
" <td>0.020772</td>\n",
|
||
|
" <td>0.839763</td>\n",
|
||
|
" <td>0.130564</td>\n",
|
||
|
" <td>0.008902</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.041543</td>\n",
|
||
|
" <td>0.094955</td>\n",
|
||
|
" <td>0.738872</td>\n",
|
||
|
" <td>0.124629</td>\n",
|
||
|
" <td>0.676558</td>\n",
|
||
|
" <td>0.323442</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1699</th>\n",
|
||
|
" <td>1736</td>\n",
|
||
|
" <td>0.034483</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.482759</td>\n",
|
||
|
" <td>0.206897</td>\n",
|
||
|
" <td>0.275862</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.241379</td>\n",
|
||
|
" <td>0.551724</td>\n",
|
||
|
" <td>0.206897</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.172414</td>\n",
|
||
|
" <td>0.827586</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.931034</td>\n",
|
||
|
" <td>0.068966</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.379310</td>\n",
|
||
|
" <td>0.413793</td>\n",
|
||
|
" <td>0.206897</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.448276</td>\n",
|
||
|
" <td>0.551724</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7639</th>\n",
|
||
|
" <td>7779</td>\n",
|
||
|
" <td>0.037037</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.296296</td>\n",
|
||
|
" <td>0.259259</td>\n",
|
||
|
" <td>0.370370</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.037037</td>\n",
|
||
|
" <td>0.111111</td>\n",
|
||
|
" <td>0.296296</td>\n",
|
||
|
" <td>0.481481</td>\n",
|
||
|
" <td>0.111111</td>\n",
|
||
|
" <td>1.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.814815</td>\n",
|
||
|
" <td>0.185185</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.037037</td>\n",
|
||
|
" <td>0.740741</td>\n",
|
||
|
" <td>0.222222</td>\n",
|
||
|
" <td>0.814815</td>\n",
|
||
|
" <td>0.185185</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"def n_to_p(l):\n",
|
||
|
" n = sum(l)\n",
|
||
|
" return [x / n for x in l] if n > 0 else l\n",
|
||
|
"\n",
|
||
|
"def calculate_p(x, values):\n",
|
||
|
" counts = [0]*len(values)\n",
|
||
|
" for v in x:\n",
|
||
|
" counts[values.index(v)] += 1\n",
|
||
|
"\n",
|
||
|
" return n_to_p(counts)\n",
|
||
|
"\n",
|
||
|
"def prepare_users_df(interactions_df):\n",
|
||
|
"\n",
|
||
|
" users_df = interactions_df.loc[:, [\"user_id\"]]\n",
|
||
|
" users_df = users_df.groupby(\"user_id\").first().reset_index(drop=False)\n",
|
||
|
" \n",
|
||
|
" user_features = []\n",
|
||
|
"\n",
|
||
|
" for column in base_item_features:\n",
|
||
|
"\n",
|
||
|
" column_values = column_values_dict[column]\n",
|
||
|
" df = interactions_df.loc[:, ['user_id', column]]\n",
|
||
|
" df = df.groupby('user_id').aggregate(lambda x: list(x)).reset_index(drop=False)\n",
|
||
|
"\n",
|
||
|
" def calc_p(x):\n",
|
||
|
" return calculate_p(x, column_values)\n",
|
||
|
"\n",
|
||
|
" df.loc[:, column] = df[column].apply(lambda x: calc_p(x))\n",
|
||
|
"\n",
|
||
|
" p_columns = []\n",
|
||
|
" for i in range(len(column_values)):\n",
|
||
|
" p_columns.append(\"user_\" + column + \"_\" + column_values[i])\n",
|
||
|
" df.loc[:, p_columns[i]] = df[column].apply(lambda x: x[i])\n",
|
||
|
" user_features.append(p_columns[i])\n",
|
||
|
"\n",
|
||
|
" users_df = pd.merge(users_df, df.loc[:, ['user_id'] + p_columns], on=[\"user_id\"])\n",
|
||
|
" \n",
|
||
|
" return users_df, user_features\n",
|
||
|
" \n",
|
||
|
"\n",
|
||
|
"users_df, user_features = prepare_users_df(interactions_df)\n",
|
||
|
"\n",
|
||
|
"print(user_features)\n",
|
||
|
"\n",
|
||
|
"display(HTML(users_df.loc[users_df['user_id'].isin([706, 1736, 7779, 96, 1, 50, 115])].head(15).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "amino-keyboard",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# (Optional) Prepare numerical item features\n",
|
||
|
"\n",
|
||
|
"The method below is left here for convenience if you want to experiment with content-based item features as an input for your neural network."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 20,
|
||
|
"id": "formal-munich",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'length_of_stay_bucket_[0-1]', 'length_of_stay_bucket_[2-3]', 'length_of_stay_bucket_[4-7]', 'length_of_stay_bucket_[8-inf]', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>item_id</th>\n",
|
||
|
" <th>term_WinterVacation</th>\n",
|
||
|
" <th>term_Easter</th>\n",
|
||
|
" <th>term_OffSeason</th>\n",
|
||
|
" <th>term_HighSeason</th>\n",
|
||
|
" <th>term_LowSeason</th>\n",
|
||
|
" <th>term_MayLongWeekend</th>\n",
|
||
|
" <th>term_NewYear</th>\n",
|
||
|
" <th>term_Christmas</th>\n",
|
||
|
" <th>length_of_stay_bucket_[0-1]</th>\n",
|
||
|
" <th>length_of_stay_bucket_[2-3]</th>\n",
|
||
|
" <th>length_of_stay_bucket_[4-7]</th>\n",
|
||
|
" <th>length_of_stay_bucket_[8-inf]</th>\n",
|
||
|
" <th>rate_plan_Standard</th>\n",
|
||
|
" <th>rate_plan_Nonref</th>\n",
|
||
|
" <th>room_segment_[0-160]</th>\n",
|
||
|
" <th>room_segment_[160-260]</th>\n",
|
||
|
" <th>room_segment_[260-360]</th>\n",
|
||
|
" <th>room_segment_[360-500]</th>\n",
|
||
|
" <th>room_segment_[500-900]</th>\n",
|
||
|
" <th>n_people_bucket_[1-1]</th>\n",
|
||
|
" <th>n_people_bucket_[2-2]</th>\n",
|
||
|
" <th>n_people_bucket_[3-4]</th>\n",
|
||
|
" <th>n_people_bucket_[5-inf]</th>\n",
|
||
|
" <th>weekend_stay_True</th>\n",
|
||
|
" <th>weekend_stay_False</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"def map_items_to_onehot(df):\n",
|
||
|
" one_hot = pd.get_dummies(df.loc[:, base_item_features])\n",
|
||
|
" df = df.drop(base_item_features, axis = 1)\n",
|
||
|
" df = df.join(one_hot)\n",
|
||
|
" \n",
|
||
|
" return df, list(one_hot.columns)\n",
|
||
|
"\n",
|
||
|
"def prepare_items_df(interactions_df):\n",
|
||
|
" items_df = interactions_df.loc[:, [\"item_id\"] + base_item_features].drop_duplicates()\n",
|
||
|
" \n",
|
||
|
" items_df, item_features = map_items_to_onehot(items_df)\n",
|
||
|
" \n",
|
||
|
" return items_df, item_features\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"items_df, item_features = prepare_items_df(interactions_df)\n",
|
||
|
"\n",
|
||
|
"print(item_features)\n",
|
||
|
"\n",
|
||
|
"display(HTML(items_df.loc[items_df['item_id'].isin([0, 1, 2, 3, 4, 5, 6])].head(15).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "figured-imaging",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Neural network recommender\n",
|
||
|
"\n",
|
||
|
"<span style=\"color:red\"><font size=\"4\">**Task:**</font></span><br> \n",
|
||
|
"Code a recommender based on a neural network model. You are free to choose any network architecture you find appropriate. The network can use the interaction vectors for users and items, embeddings of users and items, as well as user and item features (you can use the features you developed in the first project).\n",
|
||
|
"\n",
|
||
|
"Remember to keep control over randomness - in the init method add the seed as a parameter and initialize the random seed generator with that seed (both for numpy and pytorch):\n",
|
||
|
"\n",
|
||
|
"```python\n",
|
||
|
"self.seed = seed\n",
|
||
|
"self.rng = np.random.RandomState(seed=seed)\n",
|
||
|
"```\n",
|
||
|
"in the network model:\n",
|
||
|
"```python\n",
|
||
|
"self.seed = torch.manual_seed(seed)\n",
|
||
|
"```\n",
|
||
|
"\n",
|
||
|
"You are encouraged to experiment with:\n",
|
||
|
" - the number of layers in the network, the number of neurons and different activation functions,\n",
|
||
|
" - different optimizers and their parameters,\n",
|
||
|
" - batch size and the number of epochs,\n",
|
||
|
" - embedding layers,\n",
|
||
|
" - content-based features of both users and items."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 21,
|
||
|
"id": "unlike-recipient",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from recommenders.recommender import Recommender\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"class Net(nn.Module):\n",
|
||
|
" def __init__(self, features_len, output_len):\n",
|
||
|
" super(Net, self).__init__()\n",
|
||
|
" \n",
|
||
|
" print(\"IN:\", features_len, \"OUT:\", output_len)\n",
|
||
|
" \n",
|
||
|
" self.fc1 = nn.Linear(features_len, 150)\n",
|
||
|
" self.fc2 = nn.Linear(150, 50)\n",
|
||
|
" self.fc3 = nn.Linear(50, 25)\n",
|
||
|
" self.fc4 = nn.Linear(25, output_len+500)\n",
|
||
|
" \n",
|
||
|
" def forward(self, x):\n",
|
||
|
" x = F.relu(self.fc1(x))\n",
|
||
|
" x = F.relu(self.fc2(x))\n",
|
||
|
" x = F.relu(self.fc3(x))\n",
|
||
|
" return self.fc4(x)\n",
|
||
|
"\n",
|
||
|
"# class Net(nn.Module):\n",
|
||
|
"# def __init__(self, features_len):\n",
|
||
|
"# super(Net, self).__init__()\n",
|
||
|
"# self.hid1 = nn.Linear(features_len, 10)\n",
|
||
|
"# self.hid2 = nn.Linear(10, 10)\n",
|
||
|
"# self.oupt = nn.Linear(10, 1)\n",
|
||
|
"\n",
|
||
|
"# nn.init.xavier_uniform_(self.hid1.weight)\n",
|
||
|
"# nn.init.zeros_(self.hid1.bias)\n",
|
||
|
"# nn.init.xavier_uniform_(self.hid2.weight)\n",
|
||
|
"# nn.init.zeros_(self.hid2.bias)\n",
|
||
|
"# nn.init.xavier_uniform_(self.oupt.weight)\n",
|
||
|
"# nn.init.zeros_(self.oupt.bias)\n",
|
||
|
"\n",
|
||
|
"# def forward(self, x):\n",
|
||
|
"# z = torch.tanh(self.hid1(x))\n",
|
||
|
"# z = torch.tanh(self.hid2(z))\n",
|
||
|
"# z = torch.sigmoid(self.oupt(z))\n",
|
||
|
"# return z\n",
|
||
|
" \n",
|
||
|
" \n",
|
||
|
"class NNRecommender(Recommender):\n",
|
||
|
" \"\"\"\n",
|
||
|
" Linear recommender class based on user and item features.\n",
|
||
|
" \"\"\"\n",
|
||
|
" \n",
|
||
|
" def generate_negative_interaction(self):\n",
|
||
|
" user_ids = interactions_df['user_id']\n",
|
||
|
" item_ids = interactions_df['item_id']\n",
|
||
|
" \n",
|
||
|
" user_id = user_ids.sample().item()\n",
|
||
|
" item_id = item_ids.sample().item()\n",
|
||
|
" positive_interactions = interactions_df.loc[\n",
|
||
|
" (interactions_df['item_id'] == item_id) & (interactions_df['user_id'] == user_id)]\n",
|
||
|
" \n",
|
||
|
" while not positive_interactions.empty:\n",
|
||
|
" user_id = user_ids.sample().item()\n",
|
||
|
" item_id = item_ids.sample().item()\n",
|
||
|
" positive_interactions = interactions_df.loc[\n",
|
||
|
" (interactions_df['item_id'] == item_id) & (interactions_df['user_id'] == user_id)]\n",
|
||
|
" \n",
|
||
|
" return (user_id, item_id, 0)\n",
|
||
|
" \n",
|
||
|
" def generate_negative_interactions(self, n, interactions_df, cross_df):\n",
|
||
|
" combined_dfs = pd.concat([cross_df, interactions_df[['user_id', 'item_id']]])\n",
|
||
|
" return combined_dfs.drop_duplicates(keep=False).sample(n=n)\n",
|
||
|
" \n",
|
||
|
" \n",
|
||
|
" def __init__(self, seed=6789, n_neg_per_pos=5):\n",
|
||
|
" \"\"\"\n",
|
||
|
" Initialize base recommender params and variables.\n",
|
||
|
" \"\"\"\n",
|
||
|
" self.model = None\n",
|
||
|
" self.n_neg_per_pos = n_neg_per_pos\n",
|
||
|
" \n",
|
||
|
" self.recommender_df = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n",
|
||
|
" self.users_df = None\n",
|
||
|
" self.user_features = None\n",
|
||
|
" \n",
|
||
|
" self.seed = seed\n",
|
||
|
" self.rng = np.random.RandomState(seed=seed)\n",
|
||
|
" \n",
|
||
|
" def calculate_accuracy(self, y_true, y_pred):\n",
|
||
|
" predicted = y_pred.ge(.5).view(-1)\n",
|
||
|
" return (y_true == predicted).sum().float() / len(y_true)\n",
|
||
|
" \n",
|
||
|
" def round_tensor(self, t, decimal_places=3):\n",
|
||
|
" return round(t.item(), decimal_places)\n",
|
||
|
" \n",
|
||
|
" def fit(self, interactions_df, users_df, items_df):\n",
|
||
|
" \"\"\"\n",
|
||
|
" Training of the recommender.\n",
|
||
|
" \n",
|
||
|
" :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items \n",
|
||
|
" defined by user_id, item_id and features of the interaction.\n",
|
||
|
" :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.\n",
|
||
|
" :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.\n",
|
||
|
" \"\"\"\n",
|
||
|
" \n",
|
||
|
" interactions_df = interactions_df.copy()\n",
|
||
|
" # Prepare users_df and items_df \n",
|
||
|
" # (optional - use only if you want to train a hybrid model with content-based features)\n",
|
||
|
" \n",
|
||
|
" users_df, user_features = prepare_users_df(interactions_df)\n",
|
||
|
" \n",
|
||
|
" self.users_df = users_df\n",
|
||
|
" self.user_features = user_features\n",
|
||
|
" \n",
|
||
|
" items_df, item_features = prepare_items_df(interactions_df)\n",
|
||
|
" items_df = items_df.loc[:, ['item_id'] + item_features]\n",
|
||
|
" \n",
|
||
|
" n_epochs = 51\n",
|
||
|
"\n",
|
||
|
" X = items_df[['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']]\n",
|
||
|
" y = items_df[['item_id']]\n",
|
||
|
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=self.seed)\n",
|
||
|
" \n",
|
||
|
" X_train = torch.from_numpy(X_train.to_numpy()).float()\n",
|
||
|
" y_train = torch.squeeze(torch.from_numpy(y_train.to_numpy()).long())\n",
|
||
|
" X_test = torch.from_numpy(X_test.to_numpy()).float()\n",
|
||
|
" y_test = torch.squeeze(torch.from_numpy(y_test.to_numpy()).long())\n",
|
||
|
" \n",
|
||
|
" self.net = Net(X_train.shape[1], items_df['item_id'].unique().size)\n",
|
||
|
" \n",
|
||
|
" optimizer = optim.Adam(self.net.parameters(), lr=0.05)\n",
|
||
|
" criterion = nn.CrossEntropyLoss()\n",
|
||
|
" \n",
|
||
|
" for epoch in range(n_epochs):\n",
|
||
|
" y_pred = self.net(X_train)\n",
|
||
|
" y_pred = torch.squeeze(y_pred)\n",
|
||
|
" train_loss = criterion(y_pred, y_train)\n",
|
||
|
" \n",
|
||
|
"# if epoch % 5000 == 0:\n",
|
||
|
"# train_acc = self.calculate_accuracy(y_train, y_pred)\n",
|
||
|
"# y_test_pred = self.net(X_test)\n",
|
||
|
"# y_test_pred = torch.squeeze(y_test_pred)\n",
|
||
|
"# test_loss = criterion(y_test_pred, y_test)\n",
|
||
|
"# test_acc = self.calculate_accuracy(y_test, y_test_pred)\n",
|
||
|
"# print(\n",
|
||
|
"# f'''epoch {epoch}\n",
|
||
|
"# Train set - loss: {self.round_tensor(train_loss)}, accuracy: {self.round_tensor(train_acc)}\n",
|
||
|
"# Test set - loss: {self.round_tensor(test_loss)}, accuracy: {self.round_tensor(test_acc)}\n",
|
||
|
"# ''')\n",
|
||
|
" \n",
|
||
|
" optimizer.zero_grad()\n",
|
||
|
" train_loss.backward()\n",
|
||
|
" optimizer.step()\n",
|
||
|
" \n",
|
||
|
" def recommend(self, users_df, items_df, n_recommendations=1):\n",
|
||
|
" \"\"\"\n",
|
||
|
" Serving of recommendations. Scores items in items_df for each user in users_df and returns \n",
|
||
|
" top n_recommendations for each user.\n",
|
||
|
" \n",
|
||
|
" :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.\n",
|
||
|
" :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.\n",
|
||
|
" :param int n_recommendations: Number of recommendations to be returned for each user.\n",
|
||
|
" :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations \n",
|
||
|
" for each user.\n",
|
||
|
" :rtype: pd.DataFrame\n",
|
||
|
" \"\"\"\n",
|
||
|
" \n",
|
||
|
" # Clean previous recommendations (iloc could be used alternatively)\n",
|
||
|
" self.recommender_df = self.recommender_df[:0]\n",
|
||
|
" \n",
|
||
|
" # Prepare users_df and items_df\n",
|
||
|
" # (optional - use only if you want to train a hybrid model with content-based features)\n",
|
||
|
" \n",
|
||
|
" users_df = users_df.loc[:, 'user_id']\n",
|
||
|
" users_df = pd.merge(users_df, self.users_df, on=['user_id'], how='left').fillna(0)\n",
|
||
|
" \n",
|
||
|
" # items_df, item_features = prepare_items_df(items_df)\n",
|
||
|
" # items_df = items_df.loc[:, ['item_id'] + item_features]\n",
|
||
|
" \n",
|
||
|
" # Score the items\n",
|
||
|
" \n",
|
||
|
" recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n",
|
||
|
" \n",
|
||
|
" for ix, user in users_df.iterrows():\n",
|
||
|
" prep_user = torch.from_numpy(user[['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']].to_numpy()).float()\n",
|
||
|
" \n",
|
||
|
" scores = self.net(prep_user).detach().numpy()\n",
|
||
|
" \n",
|
||
|
" chosen_ids = np.argsort(-scores)[:n_recommendations]\n",
|
||
|
" \n",
|
||
|
" recommendations = []\n",
|
||
|
" for item_id in chosen_ids:\n",
|
||
|
" recommendations.append(\n",
|
||
|
" {\n",
|
||
|
" 'user_id': user['user_id'],\n",
|
||
|
" 'item_id': item_id,\n",
|
||
|
" 'score': scores[item_id]\n",
|
||
|
" }\n",
|
||
|
" )\n",
|
||
|
" \n",
|
||
|
" user_recommendations = pd.DataFrame(recommendations)\n",
|
||
|
" \n",
|
||
|
" self.recommender_df = pd.concat([self.recommender_df, user_recommendations])\n",
|
||
|
" \n",
|
||
|
" return self.recommender_df"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "copyrighted-relative",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Quick test of the recommender"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 22,
|
||
|
"id": "greatest-canon",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"items_df = interactions_df.loc[:, ['item_id'] + base_item_features].drop_duplicates()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 23,
|
||
|
"id": "initial-capital",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"ename": "NameError",
|
||
|
"evalue": "name 'train_test_split' is not defined",
|
||
|
"output_type": "error",
|
||
|
"traceback": [
|
||
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||
|
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
||
|
"\u001b[0;32m<ipython-input-23-851d3aa5378e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Fit method\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mnn_recommender\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mNNRecommender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mnn_recommender\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minteractions_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;31m# nn_recommender.fit(interactions_df, None, None)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||
|
"\u001b[0;32m<ipython-input-21-ca3049874457>\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, interactions_df, users_df, items_df)\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mitems_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'term_WinterVacation'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_Easter'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_OffSeason'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_HighSeason'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_LowSeason'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_MayLongWeekend'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_NewYear'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_Christmas'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rate_plan_Standard'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rate_plan_Nonref'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[0-160]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[160-260]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[260-360]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[360-500]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[500-900]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'n_people_bucket_[1-1]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'n_people_bucket_[2-2]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'n_people_bucket_[3-4]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'n_people_bucket_[5-inf]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'weekend_stay_True'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'weekend_stay_False'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mitems_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'item_id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 116\u001b[0;31m \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0mX_train\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||
|
"\u001b[0;31mNameError\u001b[0m: name 'train_test_split' is not defined"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Fit method\n",
|
||
|
"nn_recommender = NNRecommender()\n",
|
||
|
"nn_recommender.fit(interactions_df.head(1000), None, None)\n",
|
||
|
"# nn_recommender.fit(interactions_df, None, None)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 193,
|
||
|
"id": "digital-consolidation",
|
||
|
"metadata": {
|
||
|
"scrolled": true
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user_id</th>\n",
|
||
|
" <th>item_id</th>\n",
|
||
|
" <th>score</th>\n",
|
||
|
" <th>term</th>\n",
|
||
|
" <th>length_of_stay_bucket</th>\n",
|
||
|
" <th>rate_plan</th>\n",
|
||
|
" <th>room_segment</th>\n",
|
||
|
" <th>n_people_bucket</th>\n",
|
||
|
" <th>weekend_stay</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>88</td>\n",
|
||
|
" <td>37.715969</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[0-1]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[2-2]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>57</td>\n",
|
||
|
" <td>36.182877</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[2-2]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>69</td>\n",
|
||
|
" <td>35.771114</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[4-7]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[2-2]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Recommender method\n",
|
||
|
"\n",
|
||
|
"recommendations = nn_recommender.recommend(pd.DataFrame([[1]], columns=['user_id']), items_df, 3)\n",
|
||
|
"\n",
|
||
|
"recommendations = pd.merge(recommendations, items_df, on='item_id', how='left')\n",
|
||
|
"display(HTML(recommendations.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "advanced-eleven",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Tuning method"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 194,
|
||
|
"id": "strange-alaska",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from evaluation_and_testing.testing import evaluate_train_test_split_implicit\n",
|
||
|
"\n",
|
||
|
"seed = 6789"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 195,
|
||
|
"id": "stable-theta",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from hyperopt import hp, fmin, tpe, Trials\n",
|
||
|
"import traceback\n",
|
||
|
"\n",
|
||
|
"def tune_recommender(recommender_class, interactions_df, items_df, \n",
|
||
|
" param_space, max_evals=1, show_progressbar=True, seed=6789):\n",
|
||
|
" # Split into train_validation and test sets\n",
|
||
|
"\n",
|
||
|
" shuffle = np.arange(len(interactions_df))\n",
|
||
|
" rng = np.random.RandomState(seed=seed)\n",
|
||
|
" rng.shuffle(shuffle)\n",
|
||
|
" shuffle = list(shuffle)\n",
|
||
|
"\n",
|
||
|
" train_test_split = 0.8\n",
|
||
|
" split_index = int(len(interactions_df) * train_test_split)\n",
|
||
|
"\n",
|
||
|
" train_validation = interactions_df.iloc[shuffle[:split_index]]\n",
|
||
|
" test = interactions_df.iloc[shuffle[split_index:]]\n",
|
||
|
"\n",
|
||
|
" # Tune\n",
|
||
|
"\n",
|
||
|
" def loss(tuned_params):\n",
|
||
|
" recommender = recommender_class(seed=seed, **tuned_params)\n",
|
||
|
" hr1, hr3, hr5, hr10, ndcg1, ndcg3, ndcg5, ndcg10 = evaluate_train_test_split_implicit(\n",
|
||
|
" recommender, train_validation, items_df, seed=seed)\n",
|
||
|
" return -hr10\n",
|
||
|
"\n",
|
||
|
" n_tries = 1\n",
|
||
|
" succeded = False\n",
|
||
|
" try_id = 0\n",
|
||
|
" while not succeded and try_id < n_tries:\n",
|
||
|
" try:\n",
|
||
|
" trials = Trials()\n",
|
||
|
" best_param_set = fmin(loss, space=param_space, algo=tpe.suggest, \n",
|
||
|
" max_evals=max_evals, show_progressbar=show_progressbar, trials=trials, verbose=True)\n",
|
||
|
" succeded = True\n",
|
||
|
" except:\n",
|
||
|
" traceback.print_exc()\n",
|
||
|
" try_id += 1\n",
|
||
|
" \n",
|
||
|
" if not succeded:\n",
|
||
|
" return None\n",
|
||
|
" \n",
|
||
|
" # Validate\n",
|
||
|
" \n",
|
||
|
" recommender = recommender_class(seed=seed, **best_param_set)\n",
|
||
|
"\n",
|
||
|
" results = [[recommender_class.__name__] + list(evaluate_train_test_split_implicit(\n",
|
||
|
" recommender, {'train': train_validation, 'test': test}, items_df, seed=seed))]\n",
|
||
|
"\n",
|
||
|
" results = pd.DataFrame(results, \n",
|
||
|
" columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
" display(HTML(results.to_html()))\n",
|
||
|
" \n",
|
||
|
" return best_param_set"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "reliable-switzerland",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Tuning of the recommender\n",
|
||
|
"\n",
|
||
|
"<span style=\"color:red\"><font size=\"4\">**Task:**</font></span><br> \n",
|
||
|
"Tune your model using the code below. You only need to put the class name of your recommender and choose an appropriate parameter space."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 196,
|
||
|
"id": "obvious-astrology",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"IN: \n",
|
||
|
"21 \n",
|
||
|
"OUT: \n",
|
||
|
"691 \n",
|
||
|
"IN: \n",
|
||
|
"21 \n",
|
||
|
"OUT: \n",
|
||
|
"691 \n",
|
||
|
"IN: \n",
|
||
|
"21 \n",
|
||
|
"OUT: \n",
|
||
|
"691 \n",
|
||
|
"IN: \n",
|
||
|
"21 \n",
|
||
|
"OUT: \n",
|
||
|
"691 \n",
|
||
|
"IN: \n",
|
||
|
"21 \n",
|
||
|
"OUT: \n",
|
||
|
"691 \n",
|
||
|
"IN: \n",
|
||
|
"21 \n",
|
||
|
"OUT: \n",
|
||
|
"691 \n",
|
||
|
"IN: \n",
|
||
|
"21 \n",
|
||
|
"OUT: \n",
|
||
|
"691 \n",
|
||
|
"IN: \n",
|
||
|
"21 \n",
|
||
|
"OUT: \n",
|
||
|
"691 \n",
|
||
|
"IN: \n",
|
||
|
"21 \n",
|
||
|
"OUT: \n",
|
||
|
"691 \n",
|
||
|
"IN: \n",
|
||
|
"21 \n",
|
||
|
"OUT: \n",
|
||
|
"691 \n",
|
||
|
"100%|██████████| 10/10 [18:34<00:00, 111.50s/trial, best loss: -0.04424416222859484]\n",
|
||
|
"IN: 21 OUT: 736\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>NNRecommender</td>\n",
|
||
|
" <td>0.010201</td>\n",
|
||
|
" <td>0.020072</td>\n",
|
||
|
" <td>0.026324</td>\n",
|
||
|
" <td>0.035538</td>\n",
|
||
|
" <td>0.010201</td>\n",
|
||
|
" <td>0.01574</td>\n",
|
||
|
" <td>0.018216</td>\n",
|
||
|
" <td>0.021141</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Best parameters:\n",
|
||
|
"{'n_neg_per_pos': 9.0}\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"param_space = {\n",
|
||
|
" 'n_neg_per_pos': hp.quniform('n_neg_per_pos', 1, 10, 1)\n",
|
||
|
"}\n",
|
||
|
"items_df['item_id'].unique().size\n",
|
||
|
"\n",
|
||
|
"best_param_set = tune_recommender(NNRecommender, interactions_df, items_df,\n",
|
||
|
" param_space, max_evals=10, show_progressbar=True, seed=seed)\n",
|
||
|
"\n",
|
||
|
"print(\"Best parameters:\")\n",
|
||
|
"print(best_param_set)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "accredited-strap",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Final evaluation\n",
|
||
|
"\n",
|
||
|
"<span style=\"color:red\"><font size=\"4\">**Task:**</font></span><br> \n",
|
||
|
"Run the final evaluation of your recommender and present its results against the Amazon and Netflix recommenders' results. You just need to give the class name of your recommender and its tuned parameters below."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 198,
|
||
|
"id": "given-homework",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"IN: 21 OUT: 736\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>NNRecommender</td>\n",
|
||
|
" <td>0.003949</td>\n",
|
||
|
" <td>0.015137</td>\n",
|
||
|
" <td>0.019743</td>\n",
|
||
|
" <td>0.026654</td>\n",
|
||
|
" <td>0.003949</td>\n",
|
||
|
" <td>0.010361</td>\n",
|
||
|
" <td>0.01223</td>\n",
|
||
|
" <td>0.014409</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"nn_recommender = NNRecommender(n_neg_per_pos=9) # Initialize your recommender here\n",
|
||
|
"\n",
|
||
|
"# Give the name of your recommender in the line below\n",
|
||
|
"nn_tts_results = [['NNRecommender'] + list(evaluate_train_test_split_implicit(\n",
|
||
|
" nn_recommender, interactions_df, items_df))]\n",
|
||
|
"\n",
|
||
|
"nn_tts_results = pd.DataFrame(\n",
|
||
|
" nn_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(nn_tts_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 199,
|
||
|
"id": "suited-nomination",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>AmazonRecommender</td>\n",
|
||
|
" <td>0.042119</td>\n",
|
||
|
" <td>0.10464</td>\n",
|
||
|
" <td>0.140507</td>\n",
|
||
|
" <td>0.199408</td>\n",
|
||
|
" <td>0.042119</td>\n",
|
||
|
" <td>0.076826</td>\n",
|
||
|
" <td>0.091797</td>\n",
|
||
|
" <td>0.110705</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from recommenders.amazon_recommender import AmazonRecommender\n",
|
||
|
"\n",
|
||
|
"amazon_recommender = AmazonRecommender()\n",
|
||
|
"\n",
|
||
|
"amazon_tts_results = [['AmazonRecommender'] + list(evaluate_train_test_split_implicit(\n",
|
||
|
" amazon_recommender, interactions_df, items_df))]\n",
|
||
|
"\n",
|
||
|
"amazon_tts_results = pd.DataFrame(\n",
|
||
|
" amazon_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(amazon_tts_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "conservative-remedy",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from recommenders.netflix_recommender import NetflixRecommender\n",
|
||
|
"\n",
|
||
|
"netflix_recommender = NetflixRecommender(n_epochs=30, print_type='live')\n",
|
||
|
"\n",
|
||
|
"netflix_tts_results = [['NetflixRecommender'] + list(evaluate_train_test_split_implicit(\n",
|
||
|
" netflix_recommender, interactions_df, items_df))]\n",
|
||
|
"\n",
|
||
|
"netflix_tts_results = pd.DataFrame(\n",
|
||
|
" netflix_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(netflix_tts_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "moderate-printing",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"tts_results = pd.concat([nn_tts_results, amazon_tts_results, netflix_tts_results]).reset_index(drop=True)\n",
|
||
|
"display(HTML(tts_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "uniform-vegetable",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Summary\n",
|
||
|
"\n",
|
||
|
"<span style=\"color:red\"><font size=\"4\">**Task:**</font></span><br> \n",
|
||
|
"Write a summary of your experiments. What worked well and what did not? What are your thoughts how could you possibly further improve the model?"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "declared-howard",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "rek_uno",
|
||
|
"language": "python",
|
||
|
"name": "rek_uno"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.8.8"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|