1891 lines
89 KiB
Plaintext
1891 lines
89 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 302,
|
||
|
"id": "alike-morgan",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"The autoreload extension is already loaded. To reload it, use:\n",
|
||
|
" %reload_ext autoreload\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"%matplotlib inline\n",
|
||
|
"%load_ext autoreload\n",
|
||
|
"%autoreload 2\n",
|
||
|
"\n",
|
||
|
"import numpy as np\n",
|
||
|
"import pandas as pd\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"import seaborn as sns\n",
|
||
|
"from IPython.display import Markdown, display, HTML\n",
|
||
|
"from collections import defaultdict\n",
|
||
|
"\n",
|
||
|
"import torch\n",
|
||
|
"import torch.nn as nn\n",
|
||
|
"import torch.optim as optim\n",
|
||
|
"from livelossplot import PlotLosses\n",
|
||
|
"\n",
|
||
|
"# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n",
|
||
|
"import os\n",
|
||
|
"os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "blessed-knitting",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Load the dataset for recommenders"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 303,
|
||
|
"id": "victorian-bottom",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user_id</th>\n",
|
||
|
" <th>item_id</th>\n",
|
||
|
" <th>term</th>\n",
|
||
|
" <th>length_of_stay_bucket</th>\n",
|
||
|
" <th>rate_plan</th>\n",
|
||
|
" <th>room_segment</th>\n",
|
||
|
" <th>n_people_bucket</th>\n",
|
||
|
" <th>weekend_stay</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[260-360]</td>\n",
|
||
|
" <td>[5-inf]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[3-4]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[2-2]</td>\n",
|
||
|
" <td>False</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[4-7]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[3-4]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[4-7]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[0-160]</td>\n",
|
||
|
" <td>[2-2]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>Easter</td>\n",
|
||
|
" <td>[4-7]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[260-360]</td>\n",
|
||
|
" <td>[5-inf]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>OffSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[260-360]</td>\n",
|
||
|
" <td>[5-inf]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[1-1]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>9</td>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[0-160]</td>\n",
|
||
|
" <td>[1-1]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[1-1]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>10</th>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[1-1]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>11</th>\n",
|
||
|
" <td>10</td>\n",
|
||
|
" <td>9</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[3-4]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>12</th>\n",
|
||
|
" <td>11</td>\n",
|
||
|
" <td>9</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[3-4]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>13</th>\n",
|
||
|
" <td>12</td>\n",
|
||
|
" <td>10</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[8-inf]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[3-4]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>14</th>\n",
|
||
|
" <td>14</td>\n",
|
||
|
" <td>11</td>\n",
|
||
|
" <td>HighSeason</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[0-160]</td>\n",
|
||
|
" <td>[3-4]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"data_path = os.path.join(\"data\", \"hotel_data\")\n",
|
||
|
"\n",
|
||
|
"interactions_df = pd.read_csv(os.path.join(data_path, \"hotel_data_interactions_df.csv\"), index_col=0)\n",
|
||
|
"\n",
|
||
|
"base_item_features = ['term', 'length_of_stay_bucket', 'rate_plan', 'room_segment', 'n_people_bucket', 'weekend_stay']\n",
|
||
|
"\n",
|
||
|
"column_values_dict = {\n",
|
||
|
" 'term': ['WinterVacation', 'Easter', 'OffSeason', 'HighSeason', 'LowSeason', 'MayLongWeekend', 'NewYear', 'Christmas'],\n",
|
||
|
" 'length_of_stay_bucket': ['[0-1]', '[2-3]', '[4-7]', '[8-inf]'],\n",
|
||
|
" 'rate_plan': ['Standard', 'Nonref'],\n",
|
||
|
" 'room_segment': ['[0-160]', '[160-260]', '[260-360]', '[360-500]', '[500-900]'],\n",
|
||
|
" 'n_people_bucket': ['[1-1]', '[2-2]', '[3-4]', '[5-inf]'],\n",
|
||
|
" 'weekend_stay': ['True', 'False']\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"interactions_df.loc[:, 'term'] = pd.Categorical(\n",
|
||
|
" interactions_df['term'], categories=column_values_dict['term'])\n",
|
||
|
"interactions_df.loc[:, 'length_of_stay_bucket'] = pd.Categorical(\n",
|
||
|
" interactions_df['length_of_stay_bucket'], categories=column_values_dict['length_of_stay_bucket'])\n",
|
||
|
"interactions_df.loc[:, 'rate_plan'] = pd.Categorical(\n",
|
||
|
" interactions_df['rate_plan'], categories=column_values_dict['rate_plan'])\n",
|
||
|
"interactions_df.loc[:, 'room_segment'] = pd.Categorical(\n",
|
||
|
" interactions_df['room_segment'], categories=column_values_dict['room_segment'])\n",
|
||
|
"interactions_df.loc[:, 'n_people_bucket'] = pd.Categorical(\n",
|
||
|
" interactions_df['n_people_bucket'], categories=column_values_dict['n_people_bucket'])\n",
|
||
|
"interactions_df.loc[:, 'weekend_stay'] = interactions_df['weekend_stay'].astype('str')\n",
|
||
|
"interactions_df.loc[:, 'weekend_stay'] = pd.Categorical(\n",
|
||
|
" interactions_df['weekend_stay'], categories=column_values_dict['weekend_stay'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(interactions_df.head(15).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "realistic-third",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# (Optional) Prepare numerical user features\n",
|
||
|
"\n",
|
||
|
"The method below is left here for convenience if you want to experiment with content-based user features as an input for your neural network."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 304,
|
||
|
"id": "variable-jaguar",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_length_of_stay_bucket_[0-1]', 'user_length_of_stay_bucket_[2-3]', 'user_length_of_stay_bucket_[4-7]', 'user_length_of_stay_bucket_[8-inf]', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user_id</th>\n",
|
||
|
" <th>user_term_WinterVacation</th>\n",
|
||
|
" <th>user_term_Easter</th>\n",
|
||
|
" <th>user_term_OffSeason</th>\n",
|
||
|
" <th>user_term_HighSeason</th>\n",
|
||
|
" <th>user_term_LowSeason</th>\n",
|
||
|
" <th>user_term_MayLongWeekend</th>\n",
|
||
|
" <th>user_term_NewYear</th>\n",
|
||
|
" <th>user_term_Christmas</th>\n",
|
||
|
" <th>user_length_of_stay_bucket_[0-1]</th>\n",
|
||
|
" <th>user_length_of_stay_bucket_[2-3]</th>\n",
|
||
|
" <th>user_length_of_stay_bucket_[4-7]</th>\n",
|
||
|
" <th>user_length_of_stay_bucket_[8-inf]</th>\n",
|
||
|
" <th>user_rate_plan_Standard</th>\n",
|
||
|
" <th>user_rate_plan_Nonref</th>\n",
|
||
|
" <th>user_room_segment_[0-160]</th>\n",
|
||
|
" <th>user_room_segment_[160-260]</th>\n",
|
||
|
" <th>user_room_segment_[260-360]</th>\n",
|
||
|
" <th>user_room_segment_[360-500]</th>\n",
|
||
|
" <th>user_room_segment_[500-900]</th>\n",
|
||
|
" <th>user_n_people_bucket_[1-1]</th>\n",
|
||
|
" <th>user_n_people_bucket_[2-2]</th>\n",
|
||
|
" <th>user_n_people_bucket_[3-4]</th>\n",
|
||
|
" <th>user_n_people_bucket_[5-inf]</th>\n",
|
||
|
" <th>user_weekend_stay_True</th>\n",
|
||
|
" <th>user_weekend_stay_False</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0.130435</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.652174</td>\n",
|
||
|
" <td>0.086957</td>\n",
|
||
|
" <td>0.130435</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.608696</td>\n",
|
||
|
" <td>0.391304</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.521739</td>\n",
|
||
|
" <td>0.478261</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.869565</td>\n",
|
||
|
" <td>0.130435</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.739130</td>\n",
|
||
|
" <td>0.173913</td>\n",
|
||
|
" <td>0.086957</td>\n",
|
||
|
" <td>0.782609</td>\n",
|
||
|
" <td>0.217391</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>47</th>\n",
|
||
|
" <td>50</td>\n",
|
||
|
" <td>0.043478</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.434783</td>\n",
|
||
|
" <td>0.304348</td>\n",
|
||
|
" <td>0.217391</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.913043</td>\n",
|
||
|
" <td>0.086957</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.260870</td>\n",
|
||
|
" <td>0.739130</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.565217</td>\n",
|
||
|
" <td>0.434783</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.173913</td>\n",
|
||
|
" <td>0.521739</td>\n",
|
||
|
" <td>0.304348</td>\n",
|
||
|
" <td>0.782609</td>\n",
|
||
|
" <td>0.217391</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>92</th>\n",
|
||
|
" <td>96</td>\n",
|
||
|
" <td>0.083333</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.708333</td>\n",
|
||
|
" <td>0.125000</td>\n",
|
||
|
" <td>0.041667</td>\n",
|
||
|
" <td>0.041667</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.250000</td>\n",
|
||
|
" <td>0.666667</td>\n",
|
||
|
" <td>0.041667</td>\n",
|
||
|
" <td>0.041667</td>\n",
|
||
|
" <td>0.291667</td>\n",
|
||
|
" <td>0.708333</td>\n",
|
||
|
" <td>0.125000</td>\n",
|
||
|
" <td>0.791667</td>\n",
|
||
|
" <td>0.083333</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.041667</td>\n",
|
||
|
" <td>0.333333</td>\n",
|
||
|
" <td>0.541667</td>\n",
|
||
|
" <td>0.083333</td>\n",
|
||
|
" <td>0.750000</td>\n",
|
||
|
" <td>0.250000</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>111</th>\n",
|
||
|
" <td>115</td>\n",
|
||
|
" <td>0.727273</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.272727</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.500000</td>\n",
|
||
|
" <td>0.363636</td>\n",
|
||
|
" <td>0.136364</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>1.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.818182</td>\n",
|
||
|
" <td>0.181818</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.818182</td>\n",
|
||
|
" <td>0.090909</td>\n",
|
||
|
" <td>0.045455</td>\n",
|
||
|
" <td>0.045455</td>\n",
|
||
|
" <td>0.363636</td>\n",
|
||
|
" <td>0.636364</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>675</th>\n",
|
||
|
" <td>706</td>\n",
|
||
|
" <td>0.091988</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.451039</td>\n",
|
||
|
" <td>0.189911</td>\n",
|
||
|
" <td>0.207715</td>\n",
|
||
|
" <td>0.038576</td>\n",
|
||
|
" <td>0.011869</td>\n",
|
||
|
" <td>0.008902</td>\n",
|
||
|
" <td>0.169139</td>\n",
|
||
|
" <td>0.459941</td>\n",
|
||
|
" <td>0.272997</td>\n",
|
||
|
" <td>0.097923</td>\n",
|
||
|
" <td>0.994065</td>\n",
|
||
|
" <td>0.005935</td>\n",
|
||
|
" <td>0.020772</td>\n",
|
||
|
" <td>0.839763</td>\n",
|
||
|
" <td>0.130564</td>\n",
|
||
|
" <td>0.008902</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.041543</td>\n",
|
||
|
" <td>0.094955</td>\n",
|
||
|
" <td>0.738872</td>\n",
|
||
|
" <td>0.124629</td>\n",
|
||
|
" <td>0.676558</td>\n",
|
||
|
" <td>0.323442</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1699</th>\n",
|
||
|
" <td>1736</td>\n",
|
||
|
" <td>0.034483</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.482759</td>\n",
|
||
|
" <td>0.206897</td>\n",
|
||
|
" <td>0.275862</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.241379</td>\n",
|
||
|
" <td>0.551724</td>\n",
|
||
|
" <td>0.206897</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.172414</td>\n",
|
||
|
" <td>0.827586</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.931034</td>\n",
|
||
|
" <td>0.068966</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.379310</td>\n",
|
||
|
" <td>0.413793</td>\n",
|
||
|
" <td>0.206897</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.448276</td>\n",
|
||
|
" <td>0.551724</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7639</th>\n",
|
||
|
" <td>7779</td>\n",
|
||
|
" <td>0.037037</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.296296</td>\n",
|
||
|
" <td>0.259259</td>\n",
|
||
|
" <td>0.370370</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.037037</td>\n",
|
||
|
" <td>0.111111</td>\n",
|
||
|
" <td>0.296296</td>\n",
|
||
|
" <td>0.481481</td>\n",
|
||
|
" <td>0.111111</td>\n",
|
||
|
" <td>1.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.814815</td>\n",
|
||
|
" <td>0.185185</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.037037</td>\n",
|
||
|
" <td>0.740741</td>\n",
|
||
|
" <td>0.222222</td>\n",
|
||
|
" <td>0.814815</td>\n",
|
||
|
" <td>0.185185</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"def n_to_p(l):\n",
|
||
|
" n = sum(l)\n",
|
||
|
" return [x / n for x in l] if n > 0 else l\n",
|
||
|
"\n",
|
||
|
"def calculate_p(x, values):\n",
|
||
|
" counts = [0]*len(values)\n",
|
||
|
" for v in x:\n",
|
||
|
" counts[values.index(v)] += 1\n",
|
||
|
"\n",
|
||
|
" return n_to_p(counts)\n",
|
||
|
"\n",
|
||
|
"def prepare_users_df(interactions_df):\n",
|
||
|
"\n",
|
||
|
" users_df = interactions_df.loc[:, [\"user_id\"]]\n",
|
||
|
" users_df = users_df.groupby(\"user_id\").first().reset_index(drop=False)\n",
|
||
|
" \n",
|
||
|
" user_features = []\n",
|
||
|
"\n",
|
||
|
" for column in base_item_features:\n",
|
||
|
"\n",
|
||
|
" column_values = column_values_dict[column]\n",
|
||
|
" df = interactions_df.loc[:, ['user_id', column]]\n",
|
||
|
" df = df.groupby('user_id').aggregate(lambda x: list(x)).reset_index(drop=False)\n",
|
||
|
"\n",
|
||
|
" def calc_p(x):\n",
|
||
|
" return calculate_p(x, column_values)\n",
|
||
|
"\n",
|
||
|
" df.loc[:, column] = df[column].apply(lambda x: calc_p(x))\n",
|
||
|
"\n",
|
||
|
" p_columns = []\n",
|
||
|
" for i in range(len(column_values)):\n",
|
||
|
" p_columns.append(\"user_\" + column + \"_\" + column_values[i])\n",
|
||
|
" df.loc[:, p_columns[i]] = df[column].apply(lambda x: x[i])\n",
|
||
|
" user_features.append(p_columns[i])\n",
|
||
|
"\n",
|
||
|
" users_df = pd.merge(users_df, df.loc[:, ['user_id'] + p_columns], on=[\"user_id\"])\n",
|
||
|
" \n",
|
||
|
" return users_df, user_features\n",
|
||
|
" \n",
|
||
|
"\n",
|
||
|
"users_df, user_features = prepare_users_df(interactions_df)\n",
|
||
|
"\n",
|
||
|
"print(user_features)\n",
|
||
|
"\n",
|
||
|
"display(HTML(users_df.loc[users_df['user_id'].isin([706, 1736, 7779, 96, 1, 50, 115])].head(15).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "amino-keyboard",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# (Optional) Prepare numerical item features\n",
|
||
|
"\n",
|
||
|
"The method below is left here for convenience if you want to experiment with content-based item features as an input for your neural network."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 305,
|
||
|
"id": "formal-munich",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'length_of_stay_bucket_[0-1]', 'length_of_stay_bucket_[2-3]', 'length_of_stay_bucket_[4-7]', 'length_of_stay_bucket_[8-inf]', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>item_id</th>\n",
|
||
|
" <th>term_WinterVacation</th>\n",
|
||
|
" <th>term_Easter</th>\n",
|
||
|
" <th>term_OffSeason</th>\n",
|
||
|
" <th>term_HighSeason</th>\n",
|
||
|
" <th>term_LowSeason</th>\n",
|
||
|
" <th>term_MayLongWeekend</th>\n",
|
||
|
" <th>term_NewYear</th>\n",
|
||
|
" <th>term_Christmas</th>\n",
|
||
|
" <th>length_of_stay_bucket_[0-1]</th>\n",
|
||
|
" <th>length_of_stay_bucket_[2-3]</th>\n",
|
||
|
" <th>length_of_stay_bucket_[4-7]</th>\n",
|
||
|
" <th>length_of_stay_bucket_[8-inf]</th>\n",
|
||
|
" <th>rate_plan_Standard</th>\n",
|
||
|
" <th>rate_plan_Nonref</th>\n",
|
||
|
" <th>room_segment_[0-160]</th>\n",
|
||
|
" <th>room_segment_[160-260]</th>\n",
|
||
|
" <th>room_segment_[260-360]</th>\n",
|
||
|
" <th>room_segment_[360-500]</th>\n",
|
||
|
" <th>room_segment_[500-900]</th>\n",
|
||
|
" <th>n_people_bucket_[1-1]</th>\n",
|
||
|
" <th>n_people_bucket_[2-2]</th>\n",
|
||
|
" <th>n_people_bucket_[3-4]</th>\n",
|
||
|
" <th>n_people_bucket_[5-inf]</th>\n",
|
||
|
" <th>weekend_stay_True</th>\n",
|
||
|
" <th>weekend_stay_False</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"def map_items_to_onehot(df):\n",
|
||
|
" one_hot = pd.get_dummies(df.loc[:, base_item_features])\n",
|
||
|
" df = df.drop(base_item_features, axis = 1)\n",
|
||
|
" df = df.join(one_hot)\n",
|
||
|
" \n",
|
||
|
" return df, list(one_hot.columns)\n",
|
||
|
"\n",
|
||
|
"def prepare_items_df(interactions_df):\n",
|
||
|
" items_df = interactions_df.loc[:, [\"item_id\"] + base_item_features].drop_duplicates()\n",
|
||
|
" \n",
|
||
|
" items_df, item_features = map_items_to_onehot(items_df)\n",
|
||
|
" \n",
|
||
|
" return items_df, item_features\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"items_df, item_features = prepare_items_df(interactions_df)\n",
|
||
|
"\n",
|
||
|
"print(item_features)\n",
|
||
|
"\n",
|
||
|
"display(HTML(items_df.loc[items_df['item_id'].isin([0, 1, 2, 3, 4, 5, 6])].head(15).to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "figured-imaging",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Neural network recommender\n",
|
||
|
"\n",
|
||
|
"<span style=\"color:red\"><font size=\"4\">**Task:**</font></span><br> \n",
|
||
|
"Code a recommender based on a neural network model. You are free to choose any network architecture you find appropriate. The network can use the interaction vectors for users and items, embeddings of users and items, as well as user and item features (you can use the features you developed in the first project).\n",
|
||
|
"\n",
|
||
|
"Remember to keep control over randomness - in the init method add the seed as a parameter and initialize the random seed generator with that seed (both for numpy and pytorch):\n",
|
||
|
"\n",
|
||
|
"```python\n",
|
||
|
"self.seed = seed\n",
|
||
|
"self.rng = np.random.RandomState(seed=seed)\n",
|
||
|
"```\n",
|
||
|
"in the network model:\n",
|
||
|
"```python\n",
|
||
|
"self.seed = torch.manual_seed(seed)\n",
|
||
|
"```\n",
|
||
|
"\n",
|
||
|
"You are encouraged to experiment with:\n",
|
||
|
" - the number of layers in the network, the number of neurons and different activation functions,\n",
|
||
|
" - different optimizers and their parameters,\n",
|
||
|
" - batch size and the number of epochs,\n",
|
||
|
" - embedding layers,\n",
|
||
|
" - content-based features of both users and items."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 446,
|
||
|
"id": "unlike-recipient",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from recommenders.recommender import Recommender\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"# HR10 = 0.07\n",
|
||
|
"# class Net(nn.Module):\n",
|
||
|
"# def __init__(self, features_len, output_len):\n",
|
||
|
"# super(Net, self).__init__()\n",
|
||
|
" \n",
|
||
|
"# self.fc1 = nn.Linear(features_len, 150)\n",
|
||
|
"# self.fc2 = nn.Linear(150, 100)\n",
|
||
|
"# self.fc3 = nn.Linear(100, output_len)\n",
|
||
|
"# self.fc4 = nn.Linear(output_len, output_len+200)\n",
|
||
|
" \n",
|
||
|
"# self.dropout = nn.Dropout(p=0.5)\n",
|
||
|
" \n",
|
||
|
"# def forward(self, x):\n",
|
||
|
"# x = F.relu(self.fc1(x))\n",
|
||
|
"# x = self.dropout(x)\n",
|
||
|
"# x = F.relu(self.fc2(x))\n",
|
||
|
"# x = self.dropout(x)\n",
|
||
|
"# x = F.relu(self.fc3(x))\n",
|
||
|
"# return self.fc4(x)\n",
|
||
|
"\n",
|
||
|
"# HR10 = 0.06\n",
|
||
|
"# class Net(nn.Module):\n",
|
||
|
"# def __init__(self, features_len, output_len):\n",
|
||
|
"# super(Net, self).__init__()\n",
|
||
|
" \n",
|
||
|
"# self.fc1 = nn.Linear(features_len, 150)\n",
|
||
|
"# self.fc2 = nn.Linear(150, 100)\n",
|
||
|
"# self.fc3 = nn.Linear(100, output_len)\n",
|
||
|
"# self.fc4 = nn.Linear(output_len, output_len+150)\n",
|
||
|
"\n",
|
||
|
"# self.dropout = nn.Dropout(p=0.5)\n",
|
||
|
" \n",
|
||
|
"# def forward(self, x):\n",
|
||
|
"# x = F.relu(self.fc1(x))\n",
|
||
|
"# x = self.dropout(x)\n",
|
||
|
"# x = F.relu(self.fc2(x))\n",
|
||
|
"# x = self.dropout(x)\n",
|
||
|
"# x = F.relu(self.fc3(x))\n",
|
||
|
"# x = self.dropout(x)\n",
|
||
|
"# return self.fc4(x)\n",
|
||
|
"\n",
|
||
|
"# Softmax very bad choice for multiclassification\n",
|
||
|
"# class Net(nn.Module):\n",
|
||
|
"# def __init__(self, features_len, output_len):\n",
|
||
|
"# super(Net, self).__init__()\n",
|
||
|
" \n",
|
||
|
"# self.fc1 = nn.Linear(features_len, 150)\n",
|
||
|
"# self.fc2 = nn.Linear(150, 100)\n",
|
||
|
"# self.fc3 = nn.Linear(100, output_len)\n",
|
||
|
"# self.fc4 = nn.Linear(output_len, output_len+200)\n",
|
||
|
" \n",
|
||
|
"# self.dropout = nn.Dropout(p=0.5)\n",
|
||
|
"# self.softmax = nn.Softmax()\n",
|
||
|
" \n",
|
||
|
"# def forward(self, x):\n",
|
||
|
"# x = F.relu(self.fc1(x))\n",
|
||
|
"# x = self.dropout(x)\n",
|
||
|
"# x = F.relu(self.fc2(x))\n",
|
||
|
"# x = self.dropout(x)\n",
|
||
|
"# x = F.relu(self.fc3(x))\n",
|
||
|
"# x = self.fc4(x)\n",
|
||
|
"# x = self.softmax(x)\n",
|
||
|
"# return x\n",
|
||
|
" \n",
|
||
|
"# HR10 = 0.116 EPOCH 20000\n",
|
||
|
"# class Net(nn.Module):\n",
|
||
|
"# def __init__(self, features_len, output_len):\n",
|
||
|
"# super(Net, self).__init__()\n",
|
||
|
" \n",
|
||
|
"# self.fc1 = nn.Linear(features_len, 150)\n",
|
||
|
"# self.fc2 = nn.Linear(150, 100)\n",
|
||
|
"# self.fc3 = nn.Linear(100, output_len)\n",
|
||
|
"# self.fc4 = nn.Linear(output_len, output_len+200)\n",
|
||
|
" \n",
|
||
|
"# self.dropout = nn.Dropout(p=0.5)\n",
|
||
|
" \n",
|
||
|
"# def forward(self, x):\n",
|
||
|
"# x = F.relu(self.fc1(x))\n",
|
||
|
"# x = self.dropout(x)\n",
|
||
|
"# x = F.relu(self.fc2(x))\n",
|
||
|
"# x = self.dropout(x)\n",
|
||
|
"# x = F.relu(self.fc3(x))\n",
|
||
|
"# return self.fc4(x)\n",
|
||
|
" \n",
|
||
|
"class Net(nn.Module):\n",
|
||
|
" def __init__(self, features_len, output_len):\n",
|
||
|
" super(Net, self).__init__()\n",
|
||
|
" \n",
|
||
|
" self.fc1 = nn.Linear(features_len, 150)\n",
|
||
|
" self.fc2 = nn.Linear(150, 100)\n",
|
||
|
" self.fc3 = nn.Linear(100, output_len)\n",
|
||
|
" self.fc4 = nn.Linear(output_len, output_len+200)\n",
|
||
|
" \n",
|
||
|
" self.dropout = nn.Dropout(p=0.5)\n",
|
||
|
" self.prelu = nn.PReLU()\n",
|
||
|
" \n",
|
||
|
" def forward(self, x):\n",
|
||
|
" x = self.fc1(x)\n",
|
||
|
" x = self.prelu(x)\n",
|
||
|
" x = self.dropout(x)\n",
|
||
|
" x = self.fc2(x)\n",
|
||
|
" x = self.prelu(x)\n",
|
||
|
" x = self.dropout(x)\n",
|
||
|
" x = self.fc3(x)\n",
|
||
|
" x = self.prelu(x)\n",
|
||
|
" return self.fc4(x)\n",
|
||
|
" \n",
|
||
|
"class NNRecommender(Recommender):\n",
|
||
|
" \"\"\"\n",
|
||
|
" Linear recommender class based on user and item features.\n",
|
||
|
" \"\"\"\n",
|
||
|
" \n",
|
||
|
" def __init__(self, seed=6789, n_neg_per_pos=5, n_epochs=20000, lr=0.01):\n",
|
||
|
" \"\"\"\n",
|
||
|
" Initialize base recommender params and variables.\n",
|
||
|
" \"\"\"\n",
|
||
|
" self.model = None\n",
|
||
|
" self.n_neg_per_pos = n_neg_per_pos\n",
|
||
|
" \n",
|
||
|
" self.recommender_df = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n",
|
||
|
" self.users_df = None\n",
|
||
|
" self.user_features = None\n",
|
||
|
" \n",
|
||
|
" self.seed = seed\n",
|
||
|
" self.rng = np.random.RandomState(seed=seed)\n",
|
||
|
" \n",
|
||
|
" self.n_epochs = n_epochs\n",
|
||
|
" self.lr = lr\n",
|
||
|
" \n",
|
||
|
" def calculate_accuracy(self, y_true, y_pred):\n",
|
||
|
" predictions=(y_pred.argmax(1))\n",
|
||
|
" return (predictions == y_true).sum().float() / len(y_true)\n",
|
||
|
" \n",
|
||
|
" def round_tensor(self, t, decimal_places=3):\n",
|
||
|
" return round(t.item(), decimal_places)\n",
|
||
|
" \n",
|
||
|
" def fit(self, interactions_df, users_df, items_df):\n",
|
||
|
" \"\"\"\n",
|
||
|
" Training of the recommender.\n",
|
||
|
" \n",
|
||
|
" :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items \n",
|
||
|
" defined by user_id, item_id and features of the interaction.\n",
|
||
|
" :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.\n",
|
||
|
" :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.\n",
|
||
|
" \"\"\"\n",
|
||
|
" \n",
|
||
|
" interactions_df = interactions_df.copy()\n",
|
||
|
" # Prepare users_df and items_df \n",
|
||
|
" # (optional - use only if you want to train a hybrid model with content-based features)\n",
|
||
|
" \n",
|
||
|
" users_df, user_features = prepare_users_df(interactions_df)\n",
|
||
|
" \n",
|
||
|
" self.users_df = users_df\n",
|
||
|
" self.user_features = user_features\n",
|
||
|
" \n",
|
||
|
" items_df, item_features = prepare_items_df(interactions_df)\n",
|
||
|
" items_df = items_df.loc[:, ['item_id'] + item_features]\n",
|
||
|
" \n",
|
||
|
" X = items_df[['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']]\n",
|
||
|
" y = items_df[['item_id']]\n",
|
||
|
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=self.seed)\n",
|
||
|
" \n",
|
||
|
" X_train = torch.from_numpy(X_train.to_numpy()).float()\n",
|
||
|
" y_train = torch.squeeze(torch.from_numpy(y_train.to_numpy()).long())\n",
|
||
|
" X_test = torch.from_numpy(X_test.to_numpy()).float()\n",
|
||
|
" y_test = torch.squeeze(torch.from_numpy(y_test.to_numpy()).long())\n",
|
||
|
" \n",
|
||
|
" self.net = Net(X_train.shape[1], items_df['item_id'].unique().size)\n",
|
||
|
" \n",
|
||
|
" optimizer = optim.Adam(self.net.parameters(), lr=self.lr)\n",
|
||
|
" criterion = nn.CrossEntropyLoss()\n",
|
||
|
" \n",
|
||
|
" for epoch in range(self.n_epochs):\n",
|
||
|
" y_pred = self.net(X_train)\n",
|
||
|
" y_pred = torch.squeeze(y_pred)\n",
|
||
|
" train_loss = criterion(y_pred, y_train)\n",
|
||
|
" \n",
|
||
|
" if epoch % 1000 == 0:\n",
|
||
|
" y_test_pred = self.net(X_test)\n",
|
||
|
" y_test_pred = torch.squeeze(y_test_pred)\n",
|
||
|
" test_loss = criterion(y_test_pred, y_test)\n",
|
||
|
" print(\n",
|
||
|
" f'''epoch {epoch}\n",
|
||
|
" Train set - loss: {self.round_tensor(train_loss)}\n",
|
||
|
" Test set - loss: {self.round_tensor(test_loss)}\n",
|
||
|
" ''')\n",
|
||
|
" \n",
|
||
|
" optimizer.zero_grad()\n",
|
||
|
" train_loss.backward()\n",
|
||
|
" optimizer.step()\n",
|
||
|
" \n",
|
||
|
" def recommend(self, users_df, items_df, n_recommendations=1):\n",
|
||
|
" \"\"\"\n",
|
||
|
" Serving of recommendations. Scores items in items_df for each user in users_df and returns \n",
|
||
|
" top n_recommendations for each user.\n",
|
||
|
" \n",
|
||
|
" :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.\n",
|
||
|
" :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.\n",
|
||
|
" :param int n_recommendations: Number of recommendations to be returned for each user.\n",
|
||
|
" :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations \n",
|
||
|
" for each user.\n",
|
||
|
" :rtype: pd.DataFrame\n",
|
||
|
" \"\"\"\n",
|
||
|
" \n",
|
||
|
" # Clean previous recommendations (iloc could be used alternatively)\n",
|
||
|
" self.recommender_df = self.recommender_df[:0]\n",
|
||
|
" \n",
|
||
|
" # Prepare users_df and items_df\n",
|
||
|
" # (optional - use only if you want to train a hybrid model with content-based features)\n",
|
||
|
" \n",
|
||
|
" users_df = users_df.loc[:, 'user_id']\n",
|
||
|
" users_df = pd.merge(users_df, self.users_df, on=['user_id'], how='left').fillna(0)\n",
|
||
|
" \n",
|
||
|
" # items_df, item_features = prepare_items_df(items_df)\n",
|
||
|
" # items_df = items_df.loc[:, ['item_id'] + item_features]\n",
|
||
|
" \n",
|
||
|
" # Score the items\n",
|
||
|
" \n",
|
||
|
" recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n",
|
||
|
" \n",
|
||
|
" for ix, user in users_df.iterrows():\n",
|
||
|
" prep_user = torch.from_numpy(user[['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']].to_numpy()).float()\n",
|
||
|
" \n",
|
||
|
" scores = self.net(prep_user).detach().numpy()\n",
|
||
|
" \n",
|
||
|
" chosen_ids = np.argsort(-scores)[:n_recommendations]\n",
|
||
|
" \n",
|
||
|
" recommendations = []\n",
|
||
|
" for item_id in chosen_ids:\n",
|
||
|
" recommendations.append(\n",
|
||
|
" {\n",
|
||
|
" 'user_id': user['user_id'],\n",
|
||
|
" 'item_id': item_id,\n",
|
||
|
" 'score': scores[item_id]\n",
|
||
|
" }\n",
|
||
|
" )\n",
|
||
|
" \n",
|
||
|
" user_recommendations = pd.DataFrame(recommendations)\n",
|
||
|
" \n",
|
||
|
" self.recommender_df = pd.concat([self.recommender_df, user_recommendations])\n",
|
||
|
" \n",
|
||
|
" return self.recommender_df\n",
|
||
|
"\n",
|
||
|
"# Fit method\n",
|
||
|
"# nn_recommender = NNRecommender(10000, 0.02)\n",
|
||
|
"# nn_recommender.fit(interactions_df.head(1000), None, None)\n",
|
||
|
"# nn_recommender.fit(interactions_df, None, None)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "copyrighted-relative",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Quick test of the recommender"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 412,
|
||
|
"id": "greatest-canon",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"items_df = interactions_df.loc[:, ['item_id'] + base_item_features].drop_duplicates()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 413,
|
||
|
"id": "initial-capital",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"epoch 0\n",
|
||
|
" Train set - loss: 6.042, accuracy: 0.011\n",
|
||
|
" Test set - loss: 6.025, accuracy: 0.0\n",
|
||
|
" \n",
|
||
|
"epoch 100\n",
|
||
|
" Train set - loss: 1.162, accuracy: 0.506\n",
|
||
|
" Test set - loss: 36.526, accuracy: 0.0\n",
|
||
|
" \n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Fit method\n",
|
||
|
"nn_recommender = NNRecommender(n_epochs=200, lr=0.01)\n",
|
||
|
"nn_recommender.fit(interactions_df.head(1000), None, None)\n",
|
||
|
"# nn_recommender.fit(interactions_df, None, None)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 414,
|
||
|
"id": "digital-consolidation",
|
||
|
"metadata": {
|
||
|
"scrolled": true
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user_id</th>\n",
|
||
|
" <th>item_id</th>\n",
|
||
|
" <th>score</th>\n",
|
||
|
" <th>term</th>\n",
|
||
|
" <th>length_of_stay_bucket</th>\n",
|
||
|
" <th>rate_plan</th>\n",
|
||
|
" <th>room_segment</th>\n",
|
||
|
" <th>n_people_bucket</th>\n",
|
||
|
" <th>weekend_stay</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>119</td>\n",
|
||
|
" <td>5.364058</td>\n",
|
||
|
" <td>Easter</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[2-2]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>88</td>\n",
|
||
|
" <td>5.033441</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[0-1]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[2-2]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>57</td>\n",
|
||
|
" <td>4.771185</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[2-2]</td>\n",
|
||
|
" <td>True</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>3.0</td>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>11.286193</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[2-3]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[2-2]</td>\n",
|
||
|
" <td>False</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>3.0</td>\n",
|
||
|
" <td>74</td>\n",
|
||
|
" <td>10.848604</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[4-7]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[2-2]</td>\n",
|
||
|
" <td>False</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>3.0</td>\n",
|
||
|
" <td>81</td>\n",
|
||
|
" <td>10.656947</td>\n",
|
||
|
" <td>WinterVacation</td>\n",
|
||
|
" <td>[0-1]</td>\n",
|
||
|
" <td>Standard</td>\n",
|
||
|
" <td>[160-260]</td>\n",
|
||
|
" <td>[2-2]</td>\n",
|
||
|
" <td>False</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Recommender method\n",
|
||
|
"\n",
|
||
|
"recommendations = nn_recommender.recommend(pd.DataFrame([[1],[3]], columns=['user_id']), items_df, 3)\n",
|
||
|
"\n",
|
||
|
"recommendations = pd.merge(recommendations, items_df, on='item_id', how='left')\n",
|
||
|
"display(HTML(recommendations.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "advanced-eleven",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Tuning method"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 310,
|
||
|
"id": "strange-alaska",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from evaluation_and_testing.testing import evaluate_train_test_split_implicit\n",
|
||
|
"\n",
|
||
|
"seed = 6789"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 311,
|
||
|
"id": "stable-theta",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from hyperopt import hp, fmin, tpe, Trials\n",
|
||
|
"import traceback\n",
|
||
|
"\n",
|
||
|
"def tune_recommender(recommender_class, interactions_df, items_df, \n",
|
||
|
" param_space, max_evals=1, show_progressbar=True, seed=6789):\n",
|
||
|
" # Split into train_validation and test sets\n",
|
||
|
"\n",
|
||
|
" shuffle = np.arange(len(interactions_df))\n",
|
||
|
" rng = np.random.RandomState(seed=seed)\n",
|
||
|
" rng.shuffle(shuffle)\n",
|
||
|
" shuffle = list(shuffle)\n",
|
||
|
"\n",
|
||
|
" train_test_split = 0.8\n",
|
||
|
" split_index = int(len(interactions_df) * train_test_split)\n",
|
||
|
"\n",
|
||
|
" train_validation = interactions_df.iloc[shuffle[:split_index]]\n",
|
||
|
" test = interactions_df.iloc[shuffle[split_index:]]\n",
|
||
|
"\n",
|
||
|
" # Tune\n",
|
||
|
"\n",
|
||
|
" def loss(tuned_params):\n",
|
||
|
" recommender = recommender_class(seed=seed, **tuned_params)\n",
|
||
|
" hr1, hr3, hr5, hr10, ndcg1, ndcg3, ndcg5, ndcg10 = evaluate_train_test_split_implicit(\n",
|
||
|
" recommender, train_validation, items_df, seed=seed)\n",
|
||
|
" return -hr10\n",
|
||
|
"\n",
|
||
|
" n_tries = 1\n",
|
||
|
" succeded = False\n",
|
||
|
" try_id = 0\n",
|
||
|
" while not succeded and try_id < n_tries:\n",
|
||
|
" try:\n",
|
||
|
" trials = Trials()\n",
|
||
|
" best_param_set = fmin(loss, space=param_space, algo=tpe.suggest, \n",
|
||
|
" max_evals=max_evals, show_progressbar=show_progressbar, trials=trials, verbose=True)\n",
|
||
|
" succeded = True\n",
|
||
|
" except:\n",
|
||
|
" traceback.print_exc()\n",
|
||
|
" try_id += 1\n",
|
||
|
" \n",
|
||
|
" if not succeded:\n",
|
||
|
" return None\n",
|
||
|
" \n",
|
||
|
" # Validate\n",
|
||
|
" \n",
|
||
|
" recommender = recommender_class(seed=seed, **best_param_set)\n",
|
||
|
"\n",
|
||
|
" results = [[recommender_class.__name__] + list(evaluate_train_test_split_implicit(\n",
|
||
|
" recommender, {'train': train_validation, 'test': test}, items_df, seed=seed))]\n",
|
||
|
"\n",
|
||
|
" results = pd.DataFrame(results, \n",
|
||
|
" columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
" display(HTML(results.to_html()))\n",
|
||
|
" \n",
|
||
|
" return best_param_set"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "reliable-switzerland",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Tuning of the recommender\n",
|
||
|
"\n",
|
||
|
"<span style=\"color:red\"><font size=\"4\">**Task:**</font></span><br> \n",
|
||
|
"Tune your model using the code below. You only need to put the class name of your recommender and choose an appropriate parameter space."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 447,
|
||
|
"id": "obvious-astrology",
|
||
|
"metadata": {
|
||
|
"scrolled": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"epoch 0 \n",
|
||
|
" Train set - loss: 6.797\n",
|
||
|
" Test set - loss: 6.793\n",
|
||
|
" \n",
|
||
|
"epoch 1000 \n",
|
||
|
" Train set - loss: 1.009\n",
|
||
|
" Test set - loss: 29.285\n",
|
||
|
" \n",
|
||
|
"epoch 2000 \n",
|
||
|
" Train set - loss: 1.055\n",
|
||
|
" Test set - loss: 30.205\n",
|
||
|
" \n",
|
||
|
"epoch 3000 \n",
|
||
|
" Train set - loss: 0.971\n",
|
||
|
" Test set - loss: 35.335\n",
|
||
|
" \n",
|
||
|
"epoch 4000 \n",
|
||
|
" Train set - loss: 0.948\n",
|
||
|
" Test set - loss: 35.459\n",
|
||
|
" \n",
|
||
|
"epoch 5000 \n",
|
||
|
" Train set - loss: 0.927\n",
|
||
|
" Test set - loss: 35.575\n",
|
||
|
" \n",
|
||
|
"epoch 6000 \n",
|
||
|
" Train set - loss: 0.968\n",
|
||
|
" Test set - loss: 37.951\n",
|
||
|
" \n",
|
||
|
"epoch 7000 \n",
|
||
|
" Train set - loss: 0.963\n",
|
||
|
" Test set - loss: 50.067\n",
|
||
|
" \n",
|
||
|
"epoch 8000 \n",
|
||
|
" Train set - loss: 0.919\n",
|
||
|
" Test set - loss: 48.694\n",
|
||
|
" \n",
|
||
|
"epoch 9000 \n",
|
||
|
" Train set - loss: 0.888\n",
|
||
|
" Test set - loss: 51.907\n",
|
||
|
" \n",
|
||
|
"epoch 10000 \n",
|
||
|
" Train set - loss: 4.246\n",
|
||
|
" Test set - loss: 115.464\n",
|
||
|
" \n",
|
||
|
"epoch 11000 \n",
|
||
|
" Train set - loss: 0.911\n",
|
||
|
" Test set - loss: 57.464\n",
|
||
|
" \n",
|
||
|
"epoch 12000 \n",
|
||
|
" Train set - loss: 0.872\n",
|
||
|
" Test set - loss: 64.896\n",
|
||
|
" \n",
|
||
|
"epoch 13000 \n",
|
||
|
" Train set - loss: 0.931\n",
|
||
|
" Test set - loss: 52.029\n",
|
||
|
" \n",
|
||
|
"epoch 14000 \n",
|
||
|
" Train set - loss: 1.024\n",
|
||
|
" Test set - loss: 56.175\n",
|
||
|
" \n",
|
||
|
" 0%| | 0/10 [18:33<?, ?trial/s, best loss=?]\n",
|
||
|
"Best parameters:\n",
|
||
|
"None\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Traceback (most recent call last):\n",
|
||
|
" File \"<ipython-input-311-164530239fdf>\", line 33, in tune_recommender\n",
|
||
|
" best_param_set = fmin(loss, space=param_space, algo=tpe.suggest,\n",
|
||
|
" File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 507, in fmin\n",
|
||
|
" return trials.fmin(\n",
|
||
|
" File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/base.py\", line 682, in fmin\n",
|
||
|
" return fmin(\n",
|
||
|
" File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 553, in fmin\n",
|
||
|
" rval.exhaust()\n",
|
||
|
" File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 356, in exhaust\n",
|
||
|
" self.run(self.max_evals - n_done, block_until_done=self.asynchronous)\n",
|
||
|
" File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 292, in run\n",
|
||
|
" self.serial_evaluate()\n",
|
||
|
" File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 170, in serial_evaluate\n",
|
||
|
" result = self.domain.evaluate(spec, ctrl)\n",
|
||
|
" File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/base.py\", line 907, in evaluate\n",
|
||
|
" rval = self.fn(pyll_rval)\n",
|
||
|
" File \"<ipython-input-311-164530239fdf>\", line 23, in loss\n",
|
||
|
" hr1, hr3, hr5, hr10, ndcg1, ndcg3, ndcg5, ndcg10 = evaluate_train_test_split_implicit(\n",
|
||
|
" File \"/home/jovyan/REK/evaluation_and_testing/testing.py\", line 93, in evaluate_train_test_split_implicit\n",
|
||
|
" recommender.fit(interactions_df_train, None, items_df)\n",
|
||
|
" File \"<ipython-input-446-dd9ef74b6c5d>\", line 192, in fit\n",
|
||
|
" train_loss.backward()\n",
|
||
|
" File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/torch/tensor.py\", line 245, in backward\n",
|
||
|
" torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)\n",
|
||
|
" File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/torch/autograd/__init__.py\", line 145, in backward\n",
|
||
|
" Variable._execution_engine.run_backward(\n",
|
||
|
"KeyboardInterrupt\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"param_space = {\n",
|
||
|
" 'n_neg_per_pos': hp.quniform('n_neg_per_pos', 1, 10, 1)\n",
|
||
|
"}\n",
|
||
|
"items_df['item_id'].unique().size\n",
|
||
|
"\n",
|
||
|
"best_param_set = tune_recommender(NNRecommender, interactions_df, items_df,\n",
|
||
|
" param_space, max_evals=10, show_progressbar=True, seed=seed)\n",
|
||
|
"\n",
|
||
|
"print(\"Best parameters:\")\n",
|
||
|
"print(best_param_set)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "accredited-strap",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Final evaluation\n",
|
||
|
"\n",
|
||
|
"<span style=\"color:red\"><font size=\"4\">**Task:**</font></span><br> \n",
|
||
|
"Run the final evaluation of your recommender and present its results against the Amazon and Netflix recommenders' results. You just need to give the class name of your recommender and its tuned parameters below."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "given-homework",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"epoch 0\n",
|
||
|
" Train set - loss: 6.842\n",
|
||
|
" Test set - loss: 6.843\n",
|
||
|
" \n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"nn_recommender = NNRecommender(n_neg_per_pos=6, n_epochs=20000) # Initialize your recommender here\n",
|
||
|
"\n",
|
||
|
"# Give the name of your recommender in the line below\n",
|
||
|
"nn_tts_results = [['NNRecommender'] + list(evaluate_train_test_split_implicit(\n",
|
||
|
" nn_recommender, interactions_df, items_df))]\n",
|
||
|
"\n",
|
||
|
"nn_tts_results = pd.DataFrame(\n",
|
||
|
" nn_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(nn_tts_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 314,
|
||
|
"id": "suited-nomination",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>AmazonRecommender</td>\n",
|
||
|
" <td>0.042119</td>\n",
|
||
|
" <td>0.10464</td>\n",
|
||
|
" <td>0.140507</td>\n",
|
||
|
" <td>0.199408</td>\n",
|
||
|
" <td>0.042119</td>\n",
|
||
|
" <td>0.076826</td>\n",
|
||
|
" <td>0.091797</td>\n",
|
||
|
" <td>0.110711</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from recommenders.amazon_recommender import AmazonRecommender\n",
|
||
|
"\n",
|
||
|
"amazon_recommender = AmazonRecommender()\n",
|
||
|
"\n",
|
||
|
"amazon_tts_results = [['AmazonRecommender'] + list(evaluate_train_test_split_implicit(\n",
|
||
|
" amazon_recommender, interactions_df, items_df))]\n",
|
||
|
"\n",
|
||
|
"amazon_tts_results = pd.DataFrame(\n",
|
||
|
" amazon_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(amazon_tts_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 315,
|
||
|
"id": "conservative-remedy",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAbwAAAI4CAYAAAAReVyMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABE1UlEQVR4nO3de3ycdZ33/9cnM5NMzkmTtCk90II9AKVALYcV5SDggq6iLmoR+YmKiKu3h929f6K/XVH3dm93f6y3694gIuJhZUFEVG4F8bCgshxsOQgU2lJKoaG0TZO2ac6nz/3HdU06SSfJpJlkkrnez8djnJnrumbmmzHNm+/3+l6fr7k7IiIiha4o3w0QERGZDgo8ERGJBAWeiIhEggJPREQiQYEnIiKRoMATEZFIUOCJiEgkKPBE8szMtpvZ+fluh0ihU+CJiEgkKPBEZiAzKzGzr5nZzvD2NTMrCffVm9nPzWy/mbWa2R/MrCjc9xkze8XMDprZZjM7L78/icjMEc93A0Qko/8POAM4GXDgZ8DfAX8P/A3QBDSEx54BuJmtAD4OnOruO81sCRCb3maLzFzq4YnMTJcBX3L3Pe7eDHwRuDzc1wfMB4529z53/4MHRXEHgBLgeDNLuPt2d38hL60XmYEUeCIz01HAS2nPXwq3Afz/wFbgV2a2zcyuAXD3rcCngC8Ae8zsdjM7ChEBFHgiM9VO4Oi054vDbbj7QXf/G3c/Bngr8Nepc3Xu/h/u/vrwtQ780/Q2W2TmUuCJzAwJM0umbsBtwN+ZWYOZ1QOfB34AYGZ/YWavMTMD2giGMgfMbIWZvTGc3NINdIX7RAQFnshMcQ9BQKVuSWAD8BTwNPA48D/CY5cBvwHagYeBG9z9AYLzd18B9gK7gLnA56btJxCZ4UwLwIqISBSohyciIpGgwBMRkUhQ4ImISCQo8EREJBJmZGmx+vp6X7JkSb6bISIis9Bjjz22190bRm6fkYG3ZMkSNmzYkO9miIjILGRmL2XariFNERGJBAWeiIhEggJPREQiQYEnIiKRoMATEZFIUOCJiEgkKPBERCQSFHgiIhIJCjwREYkEBZ6IiESCAk9ERCJBgSciIpGgwBMRkUhQ4ImISCQo8EREJBIUeCIiEgkKPBERiQQFnoiIRIICT0REIkGBJyIikVC4gffCf8Ke5/LdChERmSEKN/B+eDk89r18t0JERGaIwg28ZA107893K0REZIYo3MArrYGu/fluhYiIzBCFG3jq4YmISJrCDTz18EREJE3hBp56eCIikqZwA089PBERSVO4gZesgf4u6O/Jd0tERGQGKNzAK60J7rsP5LUZIiIyMxRu4CVrgnsNa4qICFkGnpldaGabzWyrmV2TYf9lZvZUeHvIzE4asT9mZk+Y2c9z1fBxDfXw9k/bR4qIyMw1buCZWQy4HrgIOB641MyOH3HYi8DZ7r4a+AfgphH7PwlMb2FL9fBERCRNNj2804Ct7r7N3XuB24GL0w9w94fcfV/49BFgYWqfmS0E3gLcnJsmZylZHdyrhyciImQXeAuAHWnPm8Jto/kQcG/a868B/y8wONaHmNlVZrbBzDY0Nzdn0axxpIY01cMTERGyCzzLsM0zHmh2LkHgfSZ8/hfAHnd/bLwPcfeb3H2tu69taGjIolnjSA1pqocnIiJAPItjmoBFac8XAjtHHmRmqwmGLS9y95Zw85nA28zszUASqDKzH7j7+ybX7CzEiyFRph6eiIgA2fXw1gPLzGypmRUD64C70w8ws8XAXcDl7r4ltd3dP+vuC919Sfi6/5yWsEtReTEREQmN28Nz934z+zhwHxADbnH3jWZ2dbj/RuDzQB1wg5kB9Lv72qlrdpZUXkxERELZDGni7vcA94zYdmPa4yuBK8d5jweABybcwslQD09EREKFW2kFgh6eSouJiAiFHnjJGg1piogIUOiBV1qjIU0REQEKPfCSNdDbDgN9+W6JiIjkWWEHnpYIEhGRUGEHXqqeps7jiYhEXoEHXk1wr/N4IiKRV9iBpwLSIiISKuzAUw9PRERChR14Qz28fWMeJiIiha+wA089PBERCRV24CWSEE/qHJ6IiBR44EFYQFrX4YmIRF3hB57Ki4mICFEIPBWQFhERohB46uGJiAhRCLxkDXTpHJ6ISNRFIPCq1cMTEZEIBF5pDfS0weBAvlsiIiJ5VPiBN3TxuYY1RUSirPADT+XFRESEKASeyouJiAhRCDwtESQiIkQh8NTDExERohB4qR6eJq2IiERa4QdeqoenIU0RkUgr/MBLlEKsWEOaIiIRV/iBZ6YC0iIiEoHAAxWQFhGRiAReslo9PBGRiItI4NWohyciEnHRCLzSGvXwREQiLhqBpx6eiEjkRSPwSmuguw0GB/PdEhERyZNoBF6yBnDoUbUVEZGoikbgqYC0iEjkRSPwtAisiEjkRSPwhgpI789nK0REJI+iEXgqIC0iEnnRCDz18EREIi8agacenohI5EUj8IrLoSiuHp6ISIRFI/DMVEBaRCTiohF4oPJiIiIRF53AUwFpEZFIi07gqYcnIhJp0Qk89fBERCItq8AzswvNbLOZbTWzazLsv8zMngpvD5nZSeH2pJn90cz+ZGYbzeyLuf4BspasUWkxEZEIi493gJnFgOuBC4AmYL2Z3e3uz6Yd9iJwtrvvM7OLgJuA04Ee4I3u3m5mCeBBM7vX3R/J+U8yntKaIPDcg1mbIiISKdn08E4Dtrr7NnfvBW4HLk4/wN0fcvd94dNHgIXhdnf39nB7Irx5Tlo+Ucka8AHoOZiXjxcRkfzKJvAWADvSnjeF20bzIeDe1BMzi5nZk8Ae4Nfu/ugRtHPyVF5MRCTSsgm8TON/GXtpZnYuQeB9ZuhA9wF3P5mg13eama0a5bVXmdkGM9vQ3NycRbMmSOXFREQiLZvAawIWpT1fCOwceZCZrQZuBi5295aR+919P/AAcGGmD3H3m9x9rbuvbWhoyKJZE6QenohIpGUTeOuBZWa21MyKgXXA3ekHmNli4C7gcnffkra9wcxqwselwPnAphy1fWLUwxMRibRxZ2m6e7+ZfRy4D4gBt7j7RjO7Otx/I/B5oA64wYIZkP3uvhaYD3wvnOlZBNzh7j+fmh9lHMnq4F49PBGRSBo38ADc/R7gnhHbbkx7fCVwZYbXPQWcMsk25kZqSFM9PBGRSIpOpZXiSrAi9fBERCIqOoFXVKQlgkREIiw6gQcqIC0iEmHRCrxUeTEREYmcgg28x17ax4t7O4ZvTNZoSFNEJKIKNvDef8sf+feHXxq+sbRGQ5oiIhFVsIFXlYzT1t03fKN6eCIikVW4gVea4EDXiMBL9fA8Pws2iIhI/hR04LWNDLxkDQz2Q29HxteIiEjhKtjAqx6thwc6jyciEkEFG3hVyQQHu/uHb0zV09R5PBGRyCnYwMvYw0utmKAenohI5BRs4FWVxmnv6ad/YPDQRhWQFhGJrIINvOrSBMDwYU318EREIqtgA68qGQTesGFN9fBERCKrYAMv1cMbdvF5STVgqqcpIhJBhRt4ZRl6eEVFkKzSkKaISAQVbOClhjTbukZemlCjIU0RkQgq2MBLDWmOWl5MREQipWADr6o0DqAC0iIiAhRw4JUmYiRiph6eiIgABRx4ZkZVcpQC0urhiYhETsEGHoxWXqxaSwSJiERQQQdeZWmCtpEFpEtrYKAX+rry0iYREcmPgg48FZAWEZGUgg68qmT88HN4Ki8mIhJJBR141aOteg7q4YmIRExBB15VOKTp6RNUhlY9Vz1NEZEoKejAqy5N0D/odPUNHNqY6uFpSFNEJFIKOvAyLxFUG9xrSFNEJFIKOvCGlghKLyCdrA7u1cMTEYmUSATe8CWCYlCiJYJERKKmoANvqIC0youJiEReQQfe6EsEVauHJyISMQUdeEOLwGqJIBGRyCvowKtMBkOaoxaQFhGRyCjowIvHiqgoiQ+fpQnBxefq4YmIREpBBx6MUUBaPTwRkUgp+MCrTMYzr3re3w193Xlpk4iITL+CD7zq0kTmSSugepoiIhFS8IFXlWnFBJUXExGJnII
|
||
|
"text/plain": [
|
||
|
"<Figure size 864x576 with 2 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {
|
||
|
"needs_background": "light"
|
||
|
},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Loss\n",
|
||
|
"\ttraining \t (min: 0.161, max: 0.228, cur: 0.161)\n",
|
||
|
"\tvalidation \t (min: 0.176, max: 0.242, cur: 0.177)\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>NetflixRecommender</td>\n",
|
||
|
" <td>0.042777</td>\n",
|
||
|
" <td>0.106614</td>\n",
|
||
|
" <td>0.143139</td>\n",
|
||
|
" <td>0.200395</td>\n",
|
||
|
" <td>0.042777</td>\n",
|
||
|
" <td>0.078228</td>\n",
|
||
|
" <td>0.093483</td>\n",
|
||
|
" <td>0.111724</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from recommenders.netflix_recommender import NetflixRecommender\n",
|
||
|
"\n",
|
||
|
"netflix_recommender = NetflixRecommender(n_epochs=30, print_type='live')\n",
|
||
|
"\n",
|
||
|
"netflix_tts_results = [['NetflixRecommender'] + list(evaluate_train_test_split_implicit(\n",
|
||
|
" netflix_recommender, interactions_df, items_df))]\n",
|
||
|
"\n",
|
||
|
"netflix_tts_results = pd.DataFrame(\n",
|
||
|
" netflix_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(netflix_tts_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 435,
|
||
|
"id": "moderate-printing",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>NNRecommender</td>\n",
|
||
|
" <td>0.025008</td>\n",
|
||
|
" <td>0.035209</td>\n",
|
||
|
" <td>0.066469</td>\n",
|
||
|
" <td>0.116815</td>\n",
|
||
|
" <td>0.025008</td>\n",
|
||
|
" <td>0.031100</td>\n",
|
||
|
" <td>0.043697</td>\n",
|
||
|
" <td>0.059459</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>AmazonRecommender</td>\n",
|
||
|
" <td>0.042119</td>\n",
|
||
|
" <td>0.104640</td>\n",
|
||
|
" <td>0.140507</td>\n",
|
||
|
" <td>0.199408</td>\n",
|
||
|
" <td>0.042119</td>\n",
|
||
|
" <td>0.076826</td>\n",
|
||
|
" <td>0.091797</td>\n",
|
||
|
" <td>0.110711</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>NetflixRecommender</td>\n",
|
||
|
" <td>0.042777</td>\n",
|
||
|
" <td>0.106614</td>\n",
|
||
|
" <td>0.143139</td>\n",
|
||
|
" <td>0.200395</td>\n",
|
||
|
" <td>0.042777</td>\n",
|
||
|
" <td>0.078228</td>\n",
|
||
|
" <td>0.093483</td>\n",
|
||
|
" <td>0.111724</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"tts_results = pd.concat([nn_tts_results, amazon_tts_results, netflix_tts_results]).reset_index(drop=True)\n",
|
||
|
"display(HTML(tts_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "uniform-vegetable",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Summary\n",
|
||
|
"\n",
|
||
|
"<span style=\"color:red\"><font size=\"4\">**Task:**</font></span><br> \n",
|
||
|
"Write a summary of your experiments. What worked well and what did not? What are your thoughts how could you possibly further improve the model?"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "1b89411a",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"What did not work:\n",
|
||
|
"- I tried to use softmax, it wasn't good idea\n",
|
||
|
"- Firstly, I copy and paste without thinking some code from tutorial for binary linear regresion. BCELoss is not a good idea for mutli-classification.\n",
|
||
|
"- More layers don't mean better results.\n",
|
||
|
"- More epochs don't always mean better results.\n",
|
||
|
"\n",
|
||
|
"What did work well:\n",
|
||
|
"- Dropout layer increased results significantly (from HR@10 0.03 to 0.116).\n",
|
||
|
"- Using all features give me best results. \n",
|
||
|
"\n",
|
||
|
" \n",
|
||
|
"How to further improve model:\n",
|
||
|
"- Add more data or more features\n",
|
||
|
"- Work on network layout\n",
|
||
|
" \n",
|
||
|
"\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "rek_uno",
|
||
|
"language": "python",
|
||
|
"name": "rek_uno"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.8.8"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|