129 KiB
%matplotlib inline
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Markdown, display, HTML
from collections import defaultdict
import torch
import torch.nn as nn
import torch.optim as optim
from livelossplot import PlotLosses
# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
Load the dataset for recommenders
data_path = os.path.join("data", "hotel_data")
interactions_df = pd.read_csv(os.path.join(data_path, "hotel_data_interactions_df.csv"), index_col=0)
base_item_features = ['term', 'length_of_stay_bucket', 'rate_plan', 'room_segment', 'n_people_bucket', 'weekend_stay']
column_values_dict = {
'term': ['WinterVacation', 'Easter', 'OffSeason', 'HighSeason', 'LowSeason', 'MayLongWeekend', 'NewYear', 'Christmas'],
'length_of_stay_bucket': ['[0-1]', '[2-3]', '[4-7]', '[8-inf]'],
'rate_plan': ['Standard', 'Nonref'],
'room_segment': ['[0-160]', '[160-260]', '[260-360]', '[360-500]', '[500-900]'],
'n_people_bucket': ['[1-1]', '[2-2]', '[3-4]', '[5-inf]'],
'weekend_stay': ['True', 'False']
}
interactions_df.loc[:, 'term'] = pd.Categorical(
interactions_df['term'], categories=column_values_dict['term'])
interactions_df.loc[:, 'length_of_stay_bucket'] = pd.Categorical(
interactions_df['length_of_stay_bucket'], categories=column_values_dict['length_of_stay_bucket'])
interactions_df.loc[:, 'rate_plan'] = pd.Categorical(
interactions_df['rate_plan'], categories=column_values_dict['rate_plan'])
interactions_df.loc[:, 'room_segment'] = pd.Categorical(
interactions_df['room_segment'], categories=column_values_dict['room_segment'])
interactions_df.loc[:, 'n_people_bucket'] = pd.Categorical(
interactions_df['n_people_bucket'], categories=column_values_dict['n_people_bucket'])
interactions_df.loc[:, 'weekend_stay'] = interactions_df['weekend_stay'].astype('str')
interactions_df.loc[:, 'weekend_stay'] = pd.Categorical(
interactions_df['weekend_stay'], categories=column_values_dict['weekend_stay'])
display(HTML(interactions_df.head(15).to_html()))
user_id | item_id | term | length_of_stay_bucket | rate_plan | room_segment | n_people_bucket | weekend_stay | |
---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | WinterVacation | [2-3] | Standard | [260-360] | [5-inf] | True |
1 | 2 | 1 | WinterVacation | [2-3] | Standard | [160-260] | [3-4] | True |
2 | 3 | 2 | WinterVacation | [2-3] | Standard | [160-260] | [2-2] | False |
3 | 4 | 3 | WinterVacation | [4-7] | Standard | [160-260] | [3-4] | True |
4 | 5 | 4 | WinterVacation | [4-7] | Standard | [0-160] | [2-2] | True |
5 | 6 | 5 | Easter | [4-7] | Standard | [260-360] | [5-inf] | True |
6 | 7 | 6 | OffSeason | [2-3] | Standard | [260-360] | [5-inf] | True |
7 | 8 | 7 | HighSeason | [2-3] | Standard | [160-260] | [1-1] | True |
8 | 9 | 8 | HighSeason | [2-3] | Standard | [0-160] | [1-1] | True |
9 | 8 | 7 | HighSeason | [2-3] | Standard | [160-260] | [1-1] | True |
10 | 8 | 7 | HighSeason | [2-3] | Standard | [160-260] | [1-1] | True |
11 | 10 | 9 | HighSeason | [2-3] | Standard | [160-260] | [3-4] | True |
12 | 11 | 9 | HighSeason | [2-3] | Standard | [160-260] | [3-4] | True |
13 | 12 | 10 | HighSeason | [8-inf] | Standard | [160-260] | [3-4] | True |
14 | 14 | 11 | HighSeason | [2-3] | Standard | [0-160] | [3-4] | True |
(Optional) Prepare numerical user features
The method below is left here for convenience if you want to experiment with content-based user features as an input for your neural network.
def n_to_p(l):
n = sum(l)
return [x / n for x in l] if n > 0 else l
def calculate_p(x, values):
counts = [0]*len(values)
for v in x:
counts[values.index(v)] += 1
return n_to_p(counts)
def prepare_users_df(interactions_df):
users_df = interactions_df.loc[:, ["user_id"]]
users_df = users_df.groupby("user_id").first().reset_index(drop=False)
user_features = []
for column in base_item_features:
column_values = column_values_dict[column]
df = interactions_df.loc[:, ['user_id', column]]
df = df.groupby('user_id').aggregate(lambda x: list(x)).reset_index(drop=False)
def calc_p(x):
return calculate_p(x, column_values)
df.loc[:, column] = df[column].apply(lambda x: calc_p(x))
p_columns = []
for i in range(len(column_values)):
p_columns.append("user_" + column + "_" + column_values[i])
df.loc[:, p_columns[i]] = df[column].apply(lambda x: x[i])
user_features.append(p_columns[i])
users_df = pd.merge(users_df, df.loc[:, ['user_id'] + p_columns], on=["user_id"])
return users_df, user_features
users_df, user_features = prepare_users_df(interactions_df)
print(user_features)
display(HTML(users_df.loc[users_df['user_id'].isin([706, 1736, 7779, 96, 1, 50, 115])].head(15).to_html()))
['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_length_of_stay_bucket_[0-1]', 'user_length_of_stay_bucket_[2-3]', 'user_length_of_stay_bucket_[4-7]', 'user_length_of_stay_bucket_[8-inf]', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']
user_id | user_term_WinterVacation | user_term_Easter | user_term_OffSeason | user_term_HighSeason | user_term_LowSeason | user_term_MayLongWeekend | user_term_NewYear | user_term_Christmas | user_length_of_stay_bucket_[0-1] | user_length_of_stay_bucket_[2-3] | user_length_of_stay_bucket_[4-7] | user_length_of_stay_bucket_[8-inf] | user_rate_plan_Standard | user_rate_plan_Nonref | user_room_segment_[0-160] | user_room_segment_[160-260] | user_room_segment_[260-360] | user_room_segment_[360-500] | user_room_segment_[500-900] | user_n_people_bucket_[1-1] | user_n_people_bucket_[2-2] | user_n_people_bucket_[3-4] | user_n_people_bucket_[5-inf] | user_weekend_stay_True | user_weekend_stay_False | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.130435 | 0.0 | 0.652174 | 0.086957 | 0.130435 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.608696 | 0.391304 | 0.000000 | 0.521739 | 0.478261 | 0.000000 | 0.869565 | 0.130435 | 0.000000 | 0.0 | 0.000000 | 0.739130 | 0.173913 | 0.086957 | 0.782609 | 0.217391 |
47 | 50 | 0.043478 | 0.0 | 0.434783 | 0.304348 | 0.217391 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.913043 | 0.086957 | 0.000000 | 0.260870 | 0.739130 | 0.000000 | 0.565217 | 0.434783 | 0.000000 | 0.0 | 0.000000 | 0.173913 | 0.521739 | 0.304348 | 0.782609 | 0.217391 |
92 | 96 | 0.083333 | 0.0 | 0.708333 | 0.125000 | 0.041667 | 0.041667 | 0.000000 | 0.000000 | 0.250000 | 0.666667 | 0.041667 | 0.041667 | 0.291667 | 0.708333 | 0.125000 | 0.791667 | 0.083333 | 0.000000 | 0.0 | 0.041667 | 0.333333 | 0.541667 | 0.083333 | 0.750000 | 0.250000 |
111 | 115 | 0.727273 | 0.0 | 0.272727 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.500000 | 0.363636 | 0.136364 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.818182 | 0.181818 | 0.000000 | 0.0 | 0.818182 | 0.090909 | 0.045455 | 0.045455 | 0.363636 | 0.636364 |
675 | 706 | 0.091988 | 0.0 | 0.451039 | 0.189911 | 0.207715 | 0.038576 | 0.011869 | 0.008902 | 0.169139 | 0.459941 | 0.272997 | 0.097923 | 0.994065 | 0.005935 | 0.020772 | 0.839763 | 0.130564 | 0.008902 | 0.0 | 0.041543 | 0.094955 | 0.738872 | 0.124629 | 0.676558 | 0.323442 |
1699 | 1736 | 0.034483 | 0.0 | 0.482759 | 0.206897 | 0.275862 | 0.000000 | 0.000000 | 0.000000 | 0.241379 | 0.551724 | 0.206897 | 0.000000 | 0.172414 | 0.827586 | 0.000000 | 0.931034 | 0.068966 | 0.000000 | 0.0 | 0.379310 | 0.413793 | 0.206897 | 0.000000 | 0.448276 | 0.551724 |
7639 | 7779 | 0.037037 | 0.0 | 0.296296 | 0.259259 | 0.370370 | 0.000000 | 0.000000 | 0.037037 | 0.111111 | 0.296296 | 0.481481 | 0.111111 | 1.000000 | 0.000000 | 0.000000 | 0.814815 | 0.185185 | 0.000000 | 0.0 | 0.000000 | 0.037037 | 0.740741 | 0.222222 | 0.814815 | 0.185185 |
(Optional) Prepare numerical item features
The method below is left here for convenience if you want to experiment with content-based item features as an input for your neural network.
def map_items_to_onehot(df):
one_hot = pd.get_dummies(df.loc[:, base_item_features])
df = df.drop(base_item_features, axis = 1)
df = df.join(one_hot)
return df, list(one_hot.columns)
def prepare_items_df(interactions_df):
items_df = interactions_df.loc[:, ["item_id"] + base_item_features].drop_duplicates()
items_df, item_features = map_items_to_onehot(items_df)
return items_df, item_features
items_df, item_features = prepare_items_df(interactions_df)
print(item_features)
display(HTML(items_df.loc[items_df['item_id'].isin([0, 1, 2, 3, 4, 5, 6])].head(15).to_html()))
['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'length_of_stay_bucket_[0-1]', 'length_of_stay_bucket_[2-3]', 'length_of_stay_bucket_[4-7]', 'length_of_stay_bucket_[8-inf]', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']
item_id | term_WinterVacation | term_Easter | term_OffSeason | term_HighSeason | term_LowSeason | term_MayLongWeekend | term_NewYear | term_Christmas | length_of_stay_bucket_[0-1] | length_of_stay_bucket_[2-3] | length_of_stay_bucket_[4-7] | length_of_stay_bucket_[8-inf] | rate_plan_Standard | rate_plan_Nonref | room_segment_[0-160] | room_segment_[160-260] | room_segment_[260-360] | room_segment_[360-500] | room_segment_[500-900] | n_people_bucket_[1-1] | n_people_bucket_[2-2] | n_people_bucket_[3-4] | n_people_bucket_[5-inf] | weekend_stay_True | weekend_stay_False | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 |
1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 |
2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
3 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 |
4 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
5 | 5 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 |
6 | 6 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 |
Neural network recommender
Task:
Code a recommender based on a neural network model. You are free to choose any network architecture you find appropriate. The network can use the interaction vectors for users and items, embeddings of users and items, as well as user and item features (you can use the features you developed in the first project).
Remember to keep control over randomness - in the init method add the seed as a parameter and initialize the random seed generator with that seed (both for numpy and pytorch):
self.seed = seed
self.rng = np.random.RandomState(seed=seed)
in the network model:
self.seed = torch.manual_seed(seed)
You are encouraged to experiment with:
- the number of layers in the network, the number of neurons and different activation functions,
- different optimizers and their parameters,
- batch size and the number of epochs,
- embedding layers,
- content-based features of both users and items.
from recommenders.recommender import Recommender
# HR10 = 0.07
# class Net(nn.Module):
# def __init__(self, features_len, output_len):
# super(Net, self).__init__()
# self.fc1 = nn.Linear(features_len, 150)
# self.fc2 = nn.Linear(150, 100)
# self.fc3 = nn.Linear(100, output_len)
# self.fc4 = nn.Linear(output_len, output_len+200)
# self.dropout = nn.Dropout(p=0.5)
# def forward(self, x):
# x = F.relu(self.fc1(x))
# x = self.dropout(x)
# x = F.relu(self.fc2(x))
# x = self.dropout(x)
# x = F.relu(self.fc3(x))
# return self.fc4(x)
# HR10 = 0.06
# class Net(nn.Module):
# def __init__(self, features_len, output_len):
# super(Net, self).__init__()
# self.fc1 = nn.Linear(features_len, 150)
# self.fc2 = nn.Linear(150, 100)
# self.fc3 = nn.Linear(100, output_len)
# self.fc4 = nn.Linear(output_len, output_len+150)
# self.dropout = nn.Dropout(p=0.5)
# def forward(self, x):
# x = F.relu(self.fc1(x))
# x = self.dropout(x)
# x = F.relu(self.fc2(x))
# x = self.dropout(x)
# x = F.relu(self.fc3(x))
# x = self.dropout(x)
# return self.fc4(x)
# Softmax very bad choice for multiclassification
# class Net(nn.Module):
# def __init__(self, features_len, output_len):
# super(Net, self).__init__()
# self.fc1 = nn.Linear(features_len, 150)
# self.fc2 = nn.Linear(150, 100)
# self.fc3 = nn.Linear(100, output_len)
# self.fc4 = nn.Linear(output_len, output_len+200)
# self.dropout = nn.Dropout(p=0.5)
# self.softmax = nn.Softmax()
# def forward(self, x):
# x = F.relu(self.fc1(x))
# x = self.dropout(x)
# x = F.relu(self.fc2(x))
# x = self.dropout(x)
# x = F.relu(self.fc3(x))
# x = self.fc4(x)
# x = self.softmax(x)
# return x
# HR10 = 0.116 EPOCH 20000
class Net(nn.Module):
def __init__(self, features_len, output_len):
super(Net, self).__init__()
self.fc1 = nn.Linear(features_len, 150)
self.fc2 = nn.Linear(150, 100)
self.fc3 = nn.Linear(100, output_len)
self.fc4 = nn.Linear(output_len, output_len+200)
self.dropout = nn.Dropout(p=0.5)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = F.relu(self.fc2(x))
x = self.dropout(x)
x = F.relu(self.fc3(x))
return self.fc4(x)
# A lot slower than ReLU
# class Net(nn.Module):
# def __init__(self, features_len, output_len):
# super(Net, self).__init__()
# self.fc1 = nn.Linear(features_len, 150)
# self.fc2 = nn.Linear(150, 100)
# self.fc3 = nn.Linear(100, output_len)
# self.fc4 = nn.Linear(output_len, output_len+200)
# self.dropout = nn.Dropout(p=0.5)
# self.prelu = nn.PReLU()
# def forward(self, x):
# x = self.fc1(x)
# x = self.prelu(x)
# x = self.dropout(x)
# x = self.fc2(x)
# x = self.prelu(x)
# x = self.dropout(x)
# x = self.fc3(x)
# x = self.prelu(x)
# return self.fc4(x)
class NNRecommender(Recommender):
"""
Linear recommender class based on user and item features.
"""
def __init__(self, seed=6789, n_neg_per_pos=5, n_epochs=20000, lr=0.01):
"""
Initialize base recommender params and variables.
"""
self.model = None
self.n_neg_per_pos = n_neg_per_pos
self.recommender_df = pd.DataFrame(columns=['user_id', 'item_id', 'score'])
self.users_df = None
self.user_features = None
self.seed = seed
self.rng = np.random.RandomState(seed=seed)
self.n_epochs = n_epochs
self.lr = lr
def calculate_accuracy(self, y_true, y_pred):
predictions=(y_pred.argmax(1))
return (predictions == y_true).sum().float() / len(y_true)
def round_tensor(self, t, decimal_places=3):
return round(t.item(), decimal_places)
def fit(self, interactions_df, users_df, items_df):
"""
Training of the recommender.
:param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items
defined by user_id, item_id and features of the interaction.
:param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.
:param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.
"""
interactions_df = interactions_df.copy()
# Prepare users_df and items_df
# (optional - use only if you want to train a hybrid model with content-based features)
users_df, user_features = prepare_users_df(interactions_df)
self.users_df = users_df
self.user_features = user_features
items_df, item_features = prepare_items_df(interactions_df)
items_df = items_df.loc[:, ['item_id'] + item_features]
X = items_df[['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']]
y = items_df[['item_id']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=self.seed)
X_train = torch.from_numpy(X_train.to_numpy()).float()
y_train = torch.squeeze(torch.from_numpy(y_train.to_numpy()).long())
X_test = torch.from_numpy(X_test.to_numpy()).float()
y_test = torch.squeeze(torch.from_numpy(y_test.to_numpy()).long())
self.net = Net(X_train.shape[1], items_df['item_id'].unique().size)
optimizer = optim.Adam(self.net.parameters(), lr=self.lr)
criterion = nn.CrossEntropyLoss()
for epoch in range(self.n_epochs):
y_pred = self.net(X_train)
y_pred = torch.squeeze(y_pred)
train_loss = criterion(y_pred, y_train)
if epoch % 1000 == 0:
y_test_pred = self.net(X_test)
y_test_pred = torch.squeeze(y_test_pred)
test_loss = criterion(y_test_pred, y_test)
print(
f'''epoch {epoch}
Train set - loss: {self.round_tensor(train_loss)}
Test set - loss: {self.round_tensor(test_loss)}
''')
optimizer.zero_grad()
train_loss.backward()
optimizer.step()
def recommend(self, users_df, items_df, n_recommendations=1):
"""
Serving of recommendations. Scores items in items_df for each user in users_df and returns
top n_recommendations for each user.
:param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.
:param pd.DataFrame items_df: DataFrame with items and their features which should be scored.
:param int n_recommendations: Number of recommendations to be returned for each user.
:return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations
for each user.
:rtype: pd.DataFrame
"""
# Clean previous recommendations (iloc could be used alternatively)
self.recommender_df = self.recommender_df[:0]
# Prepare users_df and items_df
# (optional - use only if you want to train a hybrid model with content-based features)
users_df = users_df.loc[:, 'user_id']
users_df = pd.merge(users_df, self.users_df, on=['user_id'], how='left').fillna(0)
# items_df, item_features = prepare_items_df(items_df)
# items_df = items_df.loc[:, ['item_id'] + item_features]
# Score the items
recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])
for ix, user in users_df.iterrows():
prep_user = torch.from_numpy(user[['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']].to_numpy()).float()
scores = self.net(prep_user).detach().numpy()
chosen_ids = np.argsort(-scores)[:n_recommendations]
recommendations = []
for item_id in chosen_ids:
recommendations.append(
{
'user_id': user['user_id'],
'item_id': item_id,
'score': scores[item_id]
}
)
user_recommendations = pd.DataFrame(recommendations)
self.recommender_df = pd.concat([self.recommender_df, user_recommendations])
return self.recommender_df
# Fit method
# nn_recommender = NNRecommender(10000, 0.02)
# nn_recommender.fit(interactions_df.head(1000), None, None)
# nn_recommender.fit(interactions_df, None, None)
Quick test of the recommender
items_df = interactions_df.loc[:, ['item_id'] + base_item_features].drop_duplicates()
# Fit method
nn_recommender = NNRecommender(n_epochs=200, lr=0.01)
nn_recommender.fit(interactions_df.head(1000), None, None)
# nn_recommender.fit(interactions_df, None, None)
epoch 0 Train set - loss: 6.042, accuracy: 0.011 Test set - loss: 6.025, accuracy: 0.0 epoch 100 Train set - loss: 1.162, accuracy: 0.506 Test set - loss: 36.526, accuracy: 0.0
# Recommender method
recommendations = nn_recommender.recommend(pd.DataFrame([[1],[3]], columns=['user_id']), items_df, 3)
recommendations = pd.merge(recommendations, items_df, on='item_id', how='left')
display(HTML(recommendations.to_html()))
user_id | item_id | score | term | length_of_stay_bucket | rate_plan | room_segment | n_people_bucket | weekend_stay | |
---|---|---|---|---|---|---|---|---|---|
0 | 1.0 | 119 | 5.364058 | Easter | [2-3] | Standard | [160-260] | [2-2] | True |
1 | 1.0 | 88 | 5.033441 | WinterVacation | [0-1] | Standard | [160-260] | [2-2] | True |
2 | 1.0 | 57 | 4.771185 | WinterVacation | [2-3] | Standard | [160-260] | [2-2] | True |
3 | 3.0 | 2 | 11.286193 | WinterVacation | [2-3] | Standard | [160-260] | [2-2] | False |
4 | 3.0 | 74 | 10.848604 | WinterVacation | [4-7] | Standard | [160-260] | [2-2] | False |
5 | 3.0 | 81 | 10.656947 | WinterVacation | [0-1] | Standard | [160-260] | [2-2] | False |
Tuning method
from evaluation_and_testing.testing import evaluate_train_test_split_implicit
seed = 6789
from hyperopt import hp, fmin, tpe, Trials
import traceback
def tune_recommender(recommender_class, interactions_df, items_df,
param_space, max_evals=1, show_progressbar=True, seed=6789):
# Split into train_validation and test sets
shuffle = np.arange(len(interactions_df))
rng = np.random.RandomState(seed=seed)
rng.shuffle(shuffle)
shuffle = list(shuffle)
train_test_split = 0.8
split_index = int(len(interactions_df) * train_test_split)
train_validation = interactions_df.iloc[shuffle[:split_index]]
test = interactions_df.iloc[shuffle[split_index:]]
# Tune
def loss(tuned_params):
recommender = recommender_class(seed=seed, **tuned_params)
hr1, hr3, hr5, hr10, ndcg1, ndcg3, ndcg5, ndcg10 = evaluate_train_test_split_implicit(
recommender, train_validation, items_df, seed=seed)
return -hr10
n_tries = 1
succeded = False
try_id = 0
while not succeded and try_id < n_tries:
try:
trials = Trials()
best_param_set = fmin(loss, space=param_space, algo=tpe.suggest,
max_evals=max_evals, show_progressbar=show_progressbar, trials=trials, verbose=True)
succeded = True
except:
traceback.print_exc()
try_id += 1
if not succeded:
return None
# Validate
recommender = recommender_class(seed=seed, **best_param_set)
results = [[recommender_class.__name__] + list(evaluate_train_test_split_implicit(
recommender, {'train': train_validation, 'test': test}, items_df, seed=seed))]
results = pd.DataFrame(results,
columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])
display(HTML(results.to_html()))
return best_param_set
Tuning of the recommender
Task:
Tune your model using the code below. You only need to put the class name of your recommender and choose an appropriate parameter space.
param_space = {
'n_neg_per_pos': hp.quniform('n_neg_per_pos', 1, 10, 1)
}
items_df['item_id'].unique().size
best_param_set = tune_recommender(NNRecommender, interactions_df, items_df,
param_space, max_evals=10, show_progressbar=True, seed=seed)
print("Best parameters:")
print(best_param_set)
epoch 0 Train set - loss: 6.791 Test set - loss: 6.798 epoch 1000 Train set - loss: 1.044 Test set - loss: 25.104 epoch 2000 Train set - loss: 1.031 Test set - loss: 28.583 epoch 3000 Train set - loss: 0.995 Test set - loss: 32.894 epoch 4000 Train set - loss: 0.958 Test set - loss: 32.049 epoch 5000 Train set - loss: 0.95 Test set - loss: 33.561 epoch 6000 Train set - loss: 0.919 Test set - loss: 37.039 epoch 7000 Train set - loss: 0.951 Test set - loss: 41.181 epoch 8000 Train set - loss: 0.914 Test set - loss: 39.916 epoch 9000 Train set - loss: 0.996 Test set - loss: 40.807 epoch 10000 Train set - loss: 0.917 Test set - loss: 43.963 epoch 11000 Train set - loss: 0.974 Test set - loss: 42.84 epoch 12000 Train set - loss: 0.961 Test set - loss: 48.198 epoch 13000 Train set - loss: 0.923 Test set - loss: 50.819 epoch 14000 Train set - loss: 0.989 Test set - loss: 50.511 epoch 15000 Train set - loss: 0.905 Test set - loss: 53.104 epoch 16000 Train set - loss: 0.966 Test set - loss: 51.585 epoch 17000 Train set - loss: 0.934 Test set - loss: 55.722 epoch 18000 Train set - loss: 0.926 Test set - loss: 56.764 epoch 19000 Train set - loss: 0.941 Test set - loss: 59.002 epoch 0 Train set - loss: 6.794 Test set - loss: 6.799 epoch 1000 Train set - loss: 1.016 Test set - loss: 23.549 epoch 2000 Train set - loss: 1.04 Test set - loss: 26.724 epoch 3000 Train set - loss: 1.02 Test set - loss: 30.851 epoch 4000 Train set - loss: 0.966 Test set - loss: 32.59 epoch 5000 Train set - loss: 0.976 Test set - loss: 34.689 epoch 6000 Train set - loss: 0.996 Test set - loss: 36.343 epoch 7000 Train set - loss: 0.946 Test set - loss: 38.011 epoch 8000 Train set - loss: 0.939 Test set - loss: 42.002 epoch 9000 Train set - loss: 0.94 Test set - loss: 40.951 epoch 10000 Train set - loss: 0.917 Test set - loss: 44.119 epoch 11000 Train set - loss: 0.907 Test set - loss: 43.487 epoch 12000 Train set - loss: 0.916 Test set - loss: 47.867 epoch 13000 Train set - loss: 1.014 Test set - loss: 50.954 epoch 14000 Train set - loss: 0.974 Test set - loss: 51.885 epoch 15000 Train set - loss: 0.966 Test set - loss: 53.497 epoch 16000 Train set - loss: 0.92 Test set - loss: 52.769 epoch 17000 Train set - loss: 0.938 Test set - loss: 53.099 epoch 18000 Train set - loss: 0.94 Test set - loss: 55.683 epoch 19000 Train set - loss: 0.973 Test set - loss: 55.271 epoch 0 Train set - loss: 6.794 Test set - loss: 6.782 epoch 1000 Train set - loss: 0.992 Test set - loss: 23.159 epoch 2000 Train set - loss: 0.959 Test set - loss: 26.26 epoch 3000 Train set - loss: 0.966 Test set - loss: 28.225 epoch 4000 Train set - loss: 0.964 Test set - loss: 32.285 epoch 5000 Train set - loss: 0.92 Test set - loss: 33.963 epoch 6000 Train set - loss: 0.977 Test set - loss: 36.435 epoch 7000 Train set - loss: 0.952 Test set - loss: 40.532 epoch 8000 Train set - loss: 0.937 Test set - loss: 41.049 epoch 9000 Train set - loss: 0.956 Test set - loss: 44.045 epoch 10000 Train set - loss: 0.952 Test set - loss: 48.621 epoch 11000 Train set - loss: 0.975 Test set - loss: 52.81 epoch 12000 Train set - loss: 0.937 Test set - loss: 51.067 epoch 13000 Train set - loss: 0.914 Test set - loss: 58.222 epoch 14000 Train set - loss: 0.932 Test set - loss: 58.447 epoch 15000 Train set - loss: 0.974 Test set - loss: 57.224 epoch 16000 Train set - loss: 0.933 Test set - loss: 62.57 epoch 17000 Train set - loss: 0.96 Test set - loss: 63.399 epoch 18000 Train set - loss: 0.937 Test set - loss: 65.288 epoch 19000 Train set - loss: 1.02 Test set - loss: 62.537 epoch 0 Train set - loss: 6.797 Test set - loss: 6.792 epoch 1000 Train set - loss: 1.106 Test set - loss: 23.897 epoch 2000 Train set - loss: 1.028 Test set - loss: 25.238 epoch 3000 Train set - loss: 0.981 Test set - loss: 29.186 epoch 4000 Train set - loss: 0.981 Test set - loss: 30.399 epoch 5000 Train set - loss: 0.967 Test set - loss: 33.602 epoch 6000 Train set - loss: 0.992 Test set - loss: 35.063 epoch 7000 Train set - loss: 0.955 Test set - loss: 35.093 epoch 8000 Train set - loss: 0.984 Test set - loss: 35.48 epoch 9000 Train set - loss: 1.044 Test set - loss: 37.907 epoch 10000 Train set - loss: 0.914 Test set - loss: 40.246 epoch 11000 Train set - loss: 0.941 Test set - loss: 41.36 epoch 12000 Train set - loss: 0.995 Test set - loss: 41.922 epoch 13000 Train set - loss: 0.991 Test set - loss: 45.061 epoch 14000 Train set - loss: 0.907 Test set - loss: 47.871 epoch 15000 Train set - loss: 0.964 Test set - loss: 49.0 epoch 16000 Train set - loss: 0.918 Test set - loss: 49.898 epoch 17000 Train set - loss: 0.925 Test set - loss: 52.609 epoch 18000 Train set - loss: 0.943 Test set - loss: 55.524 epoch 19000 Train set - loss: 0.988 Test set - loss: 53.781 epoch 0 Train set - loss: 6.797 Test set - loss: 6.794 epoch 1000 Train set - loss: 1.083 Test set - loss: 24.762 epoch 2000 Train set - loss: 1.002 Test set - loss: 26.87 epoch 3000 Train set - loss: 1.002 Test set - loss: 29.752 epoch 4000 Train set - loss: 0.902 Test set - loss: 30.802 epoch 5000 Train set - loss: 0.966 Test set - loss: 33.726 epoch 6000 Train set - loss: 0.929 Test set - loss: 38.221 epoch 7000 Train set - loss: 0.923 Test set - loss: 40.249 epoch 8000 Train set - loss: 0.941 Test set - loss: 43.72 epoch 9000 Train set - loss: 0.988 Test set - loss: 45.261 epoch 10000 Train set - loss: 0.958 Test set - loss: 49.028 epoch 11000 Train set - loss: 0.914 Test set - loss: 51.199 epoch 12000 Train set - loss: 0.984 Test set - loss: 52.24 epoch 13000 Train set - loss: 0.935 Test set - loss: 58.326 epoch 14000 Train set - loss: 0.932 Test set - loss: 55.572 epoch 15000 Train set - loss: 0.932 Test set - loss: 57.253 epoch 16000 Train set - loss: 0.901 Test set - loss: 59.313 epoch 17000 Train set - loss: 0.934 Test set - loss: 59.817 epoch 18000 Train set - loss: 0.994 Test set - loss: 57.325 epoch 19000 Train set - loss: 0.913 Test set - loss: 59.364 epoch 0 Train set - loss: 6.795 Test set - loss: 6.796 epoch 1000 Train set - loss: 1.067 Test set - loss: 25.381 epoch 2000 Train set - loss: 1.039 Test set - loss: 27.164 epoch 3000 Train set - loss: 0.958 Test set - loss: 30.859 epoch 4000 Train set - loss: 0.961 Test set - loss: 32.549 epoch 5000 Train set - loss: 0.922 Test set - loss: 38.252 epoch 6000 Train set - loss: 0.971 Test set - loss: 37.736 epoch 7000 Train set - loss: 0.986 Test set - loss: 43.201 epoch 8000 Train set - loss: 0.949 Test set - loss: 43.737 epoch 9000 Train set - loss: 0.895 Test set - loss: 44.754 epoch 10000 Train set - loss: 0.976 Test set - loss: 49.17 epoch 11000 Train set - loss: 0.941 Test set - loss: 51.909 epoch 12000 Train set - loss: 0.917 Test set - loss: 53.406 epoch 13000 Train set - loss: 0.97 Test set - loss: 57.24 epoch 14000 Train set - loss: 0.944 Test set - loss: 54.791 epoch 15000 Train set - loss: 0.969 Test set - loss: 56.372 epoch 16000 Train set - loss: 0.981 Test set - loss: 58.586 epoch 17000 Train set - loss: 0.965 Test set - loss: 57.376 epoch 18000 Train set - loss: 0.988 Test set - loss: 60.655 epoch 19000 Train set - loss: 0.883 Test set - loss: 58.51 epoch 0 Train set - loss: 6.794 Test set - loss: 6.786 epoch 1000 Train set - loss: 1.074 Test set - loss: 24.294 epoch 2000 Train set - loss: 1.002 Test set - loss: 25.177 epoch 3000 Train set - loss: 0.979 Test set - loss: 28.115 epoch 4000 Train set - loss: 0.974 Test set - loss: 31.27 epoch 5000 Train set - loss: 0.929 Test set - loss: 35.596 epoch 6000 Train set - loss: 0.956 Test set - loss: 39.096 epoch 7000 Train set - loss: 0.944 Test set - loss: 39.886 epoch 8000 Train set - loss: 0.951 Test set - loss: 44.383 epoch 9000 Train set - loss: 0.976 Test set - loss: 46.715 epoch 10000 Train set - loss: 0.907 Test set - loss: 48.878 epoch 11000 Train set - loss: 0.957 Test set - loss: 49.986 epoch 12000 Train set - loss: 0.998 Test set - loss: 52.608 epoch 13000 Train set - loss: 0.986 Test set - loss: 51.419 epoch 14000 Train set - loss: 0.984 Test set - loss: 55.804 epoch 15000 Train set - loss: 0.965 Test set - loss: 57.902 epoch 16000 Train set - loss: 0.905 Test set - loss: 57.022 epoch 17000 Train set - loss: 0.96 Test set - loss: 53.676 epoch 18000 Train set - loss: 0.939 Test set - loss: 62.478 epoch 19000 Train set - loss: 0.93 Test set - loss: 61.828 epoch 0 Train set - loss: 6.793 Test set - loss: 6.794 epoch 1000 Train set - loss: 1.063 Test set - loss: 23.191 epoch 2000 Train set - loss: 1.032 Test set - loss: 26.461 epoch 3000 Train set - loss: 1.02 Test set - loss: 29.392 epoch 4000 Train set - loss: 0.932 Test set - loss: 33.168 epoch 5000 Train set - loss: 1.017 Test set - loss: 34.574 epoch 6000 Train set - loss: 0.975 Test set - loss: 38.711 epoch 7000 Train set - loss: 0.953 Test set - loss: 39.829 epoch 8000 Train set - loss: 0.91 Test set - loss: 41.895 epoch 9000 Train set - loss: 0.989 Test set - loss: 45.25 epoch 10000 Train set - loss: 1.0 Test set - loss: 46.407 epoch 11000 Train set - loss: 0.98 Test set - loss: 50.797 epoch 12000 Train set - loss: 0.983 Test set - loss: 53.173 epoch 13000 Train set - loss: 0.925 Test set - loss: 54.291 epoch 14000 Train set - loss: 0.926 Test set - loss: 54.929 epoch 15000 Train set - loss: 0.986 Test set - loss: 58.36 epoch 16000 Train set - loss: 0.944 Test set - loss: 57.972 epoch 17000 Train set - loss: 0.963 Test set - loss: 58.177 epoch 18000 Train set - loss: 0.967 Test set - loss: 57.693 epoch 19000 Train set - loss: 0.97 Test set - loss: 62.002 epoch 0 Train set - loss: 6.793 Test set - loss: 6.798 epoch 1000 Train set - loss: 1.046 Test set - loss: 24.413 epoch 2000 Train set - loss: 0.981 Test set - loss: 28.192 epoch 3000 Train set - loss: 0.966 Test set - loss: 29.734 epoch 4000 Train set - loss: 0.989 Test set - loss: 34.306 epoch 5000 Train set - loss: 0.967 Test set - loss: 34.852 epoch 6000 Train set - loss: 0.902 Test set - loss: 37.421 epoch 7000 Train set - loss: 0.94 Test set - loss: 37.481 epoch 8000 Train set - loss: 0.951 Test set - loss: 40.332 epoch 9000 Train set - loss: 0.945 Test set - loss: 48.709 epoch 10000 Train set - loss: 0.967 Test set - loss: 50.611 epoch 11000 Train set - loss: 0.99 Test set - loss: 49.536 epoch 12000 Train set - loss: 0.991 Test set - loss: 53.281 epoch 13000 Train set - loss: 0.911 Test set - loss: 53.05 epoch 14000 Train set - loss: 0.952 Test set - loss: 56.761 epoch 15000 Train set - loss: 0.97 Test set - loss: 57.142 epoch 16000 Train set - loss: 0.921 Test set - loss: 57.22 epoch 17000 Train set - loss: 0.937 Test set - loss: 59.433 epoch 18000 Train set - loss: 0.964 Test set - loss: 58.954 epoch 19000 Train set - loss: 0.91 Test set - loss: 57.752 epoch 0 Train set - loss: 6.797 Test set - loss: 6.793 epoch 1000 Train set - loss: 1.052 Test set - loss: 25.378 epoch 2000 Train set - loss: 0.967 Test set - loss: 30.641 epoch 3000 Train set - loss: 0.97 Test set - loss: 32.983 epoch 4000 Train set - loss: 0.931 Test set - loss: 35.008 epoch 5000 Train set - loss: 0.95 Test set - loss: 38.592 epoch 6000 Train set - loss: 0.961 Test set - loss: 41.785 epoch 7000 Train set - loss: 0.93 Test set - loss: 46.456 epoch 8000 Train set - loss: 0.977 Test set - loss: 46.483 epoch 9000 Train set - loss: 0.955 Test set - loss: 48.554 epoch 10000 Train set - loss: 0.941 Test set - loss: 53.479 epoch 11000 Train set - loss: 1.003 Test set - loss: 51.243 epoch 12000 Train set - loss: 0.987 Test set - loss: 55.073 epoch 13000 Train set - loss: 0.995 Test set - loss: 56.564 epoch 14000 Train set - loss: 0.953 Test set - loss: 55.438 epoch 15000 Train set - loss: 0.911 Test set - loss: 58.512 epoch 16000 Train set - loss: 0.922 Test set - loss: 57.445 epoch 17000 Train set - loss: 0.949 Test set - loss: 60.568 epoch 18000 Train set - loss: 0.984 Test set - loss: 60.303 epoch 19000 Train set - loss: 0.962 Test set - loss: 63.902 100%|██████████| 10/10 [3:22:15<00:00, 1213.59s/trial, best loss: -0.0823433019254404] epoch 0 Train set - loss: 6.842 Test set - loss: 6.834 epoch 1000 Train set - loss: 1.101 Test set - loss: 25.026 epoch 2000 Train set - loss: 0.971 Test set - loss: 28.552 epoch 3000 Train set - loss: 0.989 Test set - loss: 32.089 epoch 4000 Train set - loss: 0.99 Test set - loss: 33.257 epoch 5000 Train set - loss: 0.985 Test set - loss: 36.744 epoch 6000 Train set - loss: 0.971 Test set - loss: 38.915 epoch 7000 Train set - loss: 0.977 Test set - loss: 40.527 epoch 8000 Train set - loss: 1.013 Test set - loss: 42.967 epoch 9000 Train set - loss: 0.981 Test set - loss: 44.936 epoch 10000 Train set - loss: 0.975 Test set - loss: 52.466 epoch 11000 Train set - loss: 0.949 Test set - loss: 50.95 epoch 12000 Train set - loss: 0.933 Test set - loss: 51.5 epoch 13000 Train set - loss: 1.023 Test set - loss: 54.636 epoch 14000 Train set - loss: 0.987 Test set - loss: 59.892 epoch 15000 Train set - loss: 0.996 Test set - loss: 57.323 epoch 16000 Train set - loss: 0.989 Test set - loss: 61.067 epoch 17000 Train set - loss: 0.969 Test set - loss: 64.222 epoch 18000 Train set - loss: 0.925 Test set - loss: 62.306 epoch 19000 Train set - loss: 1.006 Test set - loss: 63.963
Recommender | HR@1 | HR@3 | HR@5 | HR@10 | NDCG@1 | NDCG@3 | NDCG@5 | NDCG@10 | |
---|---|---|---|---|---|---|---|---|---|
0 | NNRecommender | 0.005265 | 0.015137 | 0.020401 | 0.032247 | 0.005265 | 0.010976 | 0.013143 | 0.01686 |
Best parameters: {'n_neg_per_pos': 5.0}
Final evaluation
Task:
Run the final evaluation of your recommender and present its results against the Amazon and Netflix recommenders' results. You just need to give the class name of your recommender and its tuned parameters below.
nn_recommender = NNRecommender(n_neg_per_pos=6, n_epochs=20000) # Initialize your recommender here
# Give the name of your recommender in the line below
nn_tts_results = [['NNRecommender'] + list(evaluate_train_test_split_implicit(
nn_recommender, interactions_df, items_df))]
nn_tts_results = pd.DataFrame(
nn_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])
display(HTML(nn_tts_results.to_html()))
Recommender | HR@1 | HR@3 | HR@5 | HR@10 | NDCG@1 | NDCG@3 | NDCG@5 | NDCG@10 | |
---|---|---|---|---|---|---|---|---|---|
0 | NNRecommender | 0.025008 | 0.035209 | 0.066469 | 0.116815 | 0.025008 | 0.0311 | 0.043697 | 0.059459 |
from recommenders.amazon_recommender import AmazonRecommender
amazon_recommender = AmazonRecommender()
amazon_tts_results = [['AmazonRecommender'] + list(evaluate_train_test_split_implicit(
amazon_recommender, interactions_df, items_df))]
amazon_tts_results = pd.DataFrame(
amazon_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])
display(HTML(amazon_tts_results.to_html()))
Recommender | HR@1 | HR@3 | HR@5 | HR@10 | NDCG@1 | NDCG@3 | NDCG@5 | NDCG@10 | |
---|---|---|---|---|---|---|---|---|---|
0 | AmazonRecommender | 0.042119 | 0.10464 | 0.140507 | 0.199408 | 0.042119 | 0.076826 | 0.091797 | 0.110711 |
from recommenders.netflix_recommender import NetflixRecommender
netflix_recommender = NetflixRecommender(n_epochs=30, print_type='live')
netflix_tts_results = [['NetflixRecommender'] + list(evaluate_train_test_split_implicit(
netflix_recommender, interactions_df, items_df))]
netflix_tts_results = pd.DataFrame(
netflix_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])
display(HTML(netflix_tts_results.to_html()))
Loss training (min: 0.161, max: 0.228, cur: 0.161) validation (min: 0.176, max: 0.242, cur: 0.177)
Recommender | HR@1 | HR@3 | HR@5 | HR@10 | NDCG@1 | NDCG@3 | NDCG@5 | NDCG@10 | |
---|---|---|---|---|---|---|---|---|---|
0 | NetflixRecommender | 0.042777 | 0.106614 | 0.143139 | 0.200395 | 0.042777 | 0.078228 | 0.093483 | 0.111724 |
tts_results = pd.concat([nn_tts_results, amazon_tts_results, netflix_tts_results]).reset_index(drop=True)
display(HTML(tts_results.to_html()))
Recommender | HR@1 | HR@3 | HR@5 | HR@10 | NDCG@1 | NDCG@3 | NDCG@5 | NDCG@10 | |
---|---|---|---|---|---|---|---|---|---|
0 | NNRecommender | 0.025008 | 0.035209 | 0.066469 | 0.116815 | 0.025008 | 0.031100 | 0.043697 | 0.059459 |
1 | AmazonRecommender | 0.042119 | 0.104640 | 0.140507 | 0.199408 | 0.042119 | 0.076826 | 0.091797 | 0.110711 |
2 | NetflixRecommender | 0.042777 | 0.106614 | 0.143139 | 0.200395 | 0.042777 | 0.078228 | 0.093483 | 0.111724 |
Summary
Task:
Write a summary of your experiments. What worked well and what did not? What are your thoughts how could you possibly further improve the model?
What did not work:
- I tried to use softmax, it wasn't a good idea
- Firstly, I copied and pasted some code without thinking from tutorial for binary linear regresion. BCELoss is not a good idea for mutli-classification.
- More layers don't mean better results.
- More epochs don't always mean better results.
- PReLU was a lot slower than ReLU and it did not give me better results.
- For some reason, n_neg_per_pos I got from fitting wasn't the best fit. With one point bigger n_neg_per_pos I got better results.
What did work well:
Dropout layer increased results significantly (from HR@10 0.03 to 0.116).
Using all features give me best results.
How to further improve model:
Add more data or more features
Work on network layout
Try using "One vs All" layout.