REK-proj-2/project_2_recommender_and_evaluation.ipynb
Aleksander Piotrowski df22464481 Fix README
2021-06-29 16:15:56 +02:00

129 KiB

%matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Markdown, display, HTML
from collections import defaultdict

import torch
import torch.nn as nn
import torch.optim as optim
from livelossplot import PlotLosses

# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

Load the dataset for recommenders

data_path = os.path.join("data", "hotel_data")

interactions_df = pd.read_csv(os.path.join(data_path, "hotel_data_interactions_df.csv"), index_col=0)

base_item_features = ['term', 'length_of_stay_bucket', 'rate_plan', 'room_segment', 'n_people_bucket', 'weekend_stay']

column_values_dict = {
    'term': ['WinterVacation', 'Easter', 'OffSeason', 'HighSeason', 'LowSeason', 'MayLongWeekend', 'NewYear', 'Christmas'],
    'length_of_stay_bucket': ['[0-1]', '[2-3]', '[4-7]', '[8-inf]'],
    'rate_plan': ['Standard', 'Nonref'],
    'room_segment': ['[0-160]', '[160-260]', '[260-360]', '[360-500]', '[500-900]'],
    'n_people_bucket': ['[1-1]', '[2-2]', '[3-4]', '[5-inf]'],
    'weekend_stay': ['True', 'False']
}

interactions_df.loc[:, 'term'] = pd.Categorical(
    interactions_df['term'], categories=column_values_dict['term'])
interactions_df.loc[:, 'length_of_stay_bucket'] = pd.Categorical(
    interactions_df['length_of_stay_bucket'], categories=column_values_dict['length_of_stay_bucket'])
interactions_df.loc[:, 'rate_plan'] = pd.Categorical(
    interactions_df['rate_plan'], categories=column_values_dict['rate_plan'])
interactions_df.loc[:, 'room_segment'] = pd.Categorical(
    interactions_df['room_segment'], categories=column_values_dict['room_segment'])
interactions_df.loc[:, 'n_people_bucket'] = pd.Categorical(
    interactions_df['n_people_bucket'], categories=column_values_dict['n_people_bucket'])
interactions_df.loc[:, 'weekend_stay'] = interactions_df['weekend_stay'].astype('str')
interactions_df.loc[:, 'weekend_stay'] = pd.Categorical(
    interactions_df['weekend_stay'], categories=column_values_dict['weekend_stay'])

display(HTML(interactions_df.head(15).to_html()))
user_id item_id term length_of_stay_bucket rate_plan room_segment n_people_bucket weekend_stay
0 1 0 WinterVacation [2-3] Standard [260-360] [5-inf] True
1 2 1 WinterVacation [2-3] Standard [160-260] [3-4] True
2 3 2 WinterVacation [2-3] Standard [160-260] [2-2] False
3 4 3 WinterVacation [4-7] Standard [160-260] [3-4] True
4 5 4 WinterVacation [4-7] Standard [0-160] [2-2] True
5 6 5 Easter [4-7] Standard [260-360] [5-inf] True
6 7 6 OffSeason [2-3] Standard [260-360] [5-inf] True
7 8 7 HighSeason [2-3] Standard [160-260] [1-1] True
8 9 8 HighSeason [2-3] Standard [0-160] [1-1] True
9 8 7 HighSeason [2-3] Standard [160-260] [1-1] True
10 8 7 HighSeason [2-3] Standard [160-260] [1-1] True
11 10 9 HighSeason [2-3] Standard [160-260] [3-4] True
12 11 9 HighSeason [2-3] Standard [160-260] [3-4] True
13 12 10 HighSeason [8-inf] Standard [160-260] [3-4] True
14 14 11 HighSeason [2-3] Standard [0-160] [3-4] True

(Optional) Prepare numerical user features

The method below is left here for convenience if you want to experiment with content-based user features as an input for your neural network.

def n_to_p(l):
    n = sum(l)
    return [x / n for x in l] if n > 0 else l

def calculate_p(x, values):
    counts = [0]*len(values)
    for v in x:
        counts[values.index(v)] += 1

    return n_to_p(counts)

def prepare_users_df(interactions_df):

    users_df = interactions_df.loc[:, ["user_id"]]
    users_df = users_df.groupby("user_id").first().reset_index(drop=False)
    
    user_features = []

    for column in base_item_features:

        column_values = column_values_dict[column]
        df = interactions_df.loc[:, ['user_id', column]]
        df = df.groupby('user_id').aggregate(lambda x: list(x)).reset_index(drop=False)

        def calc_p(x):
            return calculate_p(x, column_values)

        df.loc[:, column] = df[column].apply(lambda x: calc_p(x))

        p_columns = []
        for i in range(len(column_values)):
            p_columns.append("user_" + column + "_" + column_values[i])
            df.loc[:, p_columns[i]] = df[column].apply(lambda x: x[i])
            user_features.append(p_columns[i])

        users_df = pd.merge(users_df, df.loc[:, ['user_id'] + p_columns], on=["user_id"])
    
    return users_df, user_features
    

users_df, user_features = prepare_users_df(interactions_df)

print(user_features)

display(HTML(users_df.loc[users_df['user_id'].isin([706, 1736, 7779, 96, 1, 50, 115])].head(15).to_html()))
['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_length_of_stay_bucket_[0-1]', 'user_length_of_stay_bucket_[2-3]', 'user_length_of_stay_bucket_[4-7]', 'user_length_of_stay_bucket_[8-inf]', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']
user_id user_term_WinterVacation user_term_Easter user_term_OffSeason user_term_HighSeason user_term_LowSeason user_term_MayLongWeekend user_term_NewYear user_term_Christmas user_length_of_stay_bucket_[0-1] user_length_of_stay_bucket_[2-3] user_length_of_stay_bucket_[4-7] user_length_of_stay_bucket_[8-inf] user_rate_plan_Standard user_rate_plan_Nonref user_room_segment_[0-160] user_room_segment_[160-260] user_room_segment_[260-360] user_room_segment_[360-500] user_room_segment_[500-900] user_n_people_bucket_[1-1] user_n_people_bucket_[2-2] user_n_people_bucket_[3-4] user_n_people_bucket_[5-inf] user_weekend_stay_True user_weekend_stay_False
0 1 0.130435 0.0 0.652174 0.086957 0.130435 0.000000 0.000000 0.000000 0.000000 0.608696 0.391304 0.000000 0.521739 0.478261 0.000000 0.869565 0.130435 0.000000 0.0 0.000000 0.739130 0.173913 0.086957 0.782609 0.217391
47 50 0.043478 0.0 0.434783 0.304348 0.217391 0.000000 0.000000 0.000000 0.000000 0.913043 0.086957 0.000000 0.260870 0.739130 0.000000 0.565217 0.434783 0.000000 0.0 0.000000 0.173913 0.521739 0.304348 0.782609 0.217391
92 96 0.083333 0.0 0.708333 0.125000 0.041667 0.041667 0.000000 0.000000 0.250000 0.666667 0.041667 0.041667 0.291667 0.708333 0.125000 0.791667 0.083333 0.000000 0.0 0.041667 0.333333 0.541667 0.083333 0.750000 0.250000
111 115 0.727273 0.0 0.272727 0.000000 0.000000 0.000000 0.000000 0.000000 0.500000 0.363636 0.136364 0.000000 1.000000 0.000000 0.000000 0.818182 0.181818 0.000000 0.0 0.818182 0.090909 0.045455 0.045455 0.363636 0.636364
675 706 0.091988 0.0 0.451039 0.189911 0.207715 0.038576 0.011869 0.008902 0.169139 0.459941 0.272997 0.097923 0.994065 0.005935 0.020772 0.839763 0.130564 0.008902 0.0 0.041543 0.094955 0.738872 0.124629 0.676558 0.323442
1699 1736 0.034483 0.0 0.482759 0.206897 0.275862 0.000000 0.000000 0.000000 0.241379 0.551724 0.206897 0.000000 0.172414 0.827586 0.000000 0.931034 0.068966 0.000000 0.0 0.379310 0.413793 0.206897 0.000000 0.448276 0.551724
7639 7779 0.037037 0.0 0.296296 0.259259 0.370370 0.000000 0.000000 0.037037 0.111111 0.296296 0.481481 0.111111 1.000000 0.000000 0.000000 0.814815 0.185185 0.000000 0.0 0.000000 0.037037 0.740741 0.222222 0.814815 0.185185

(Optional) Prepare numerical item features

The method below is left here for convenience if you want to experiment with content-based item features as an input for your neural network.

def map_items_to_onehot(df):
    one_hot = pd.get_dummies(df.loc[:, base_item_features])
    df = df.drop(base_item_features, axis = 1)
    df = df.join(one_hot)
    
    return df, list(one_hot.columns)

def prepare_items_df(interactions_df):
    items_df = interactions_df.loc[:, ["item_id"] + base_item_features].drop_duplicates()
    
    items_df, item_features = map_items_to_onehot(items_df)
    
    return items_df, item_features


items_df, item_features = prepare_items_df(interactions_df)

print(item_features)

display(HTML(items_df.loc[items_df['item_id'].isin([0, 1, 2, 3, 4, 5, 6])].head(15).to_html()))
['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'length_of_stay_bucket_[0-1]', 'length_of_stay_bucket_[2-3]', 'length_of_stay_bucket_[4-7]', 'length_of_stay_bucket_[8-inf]', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']
item_id term_WinterVacation term_Easter term_OffSeason term_HighSeason term_LowSeason term_MayLongWeekend term_NewYear term_Christmas length_of_stay_bucket_[0-1] length_of_stay_bucket_[2-3] length_of_stay_bucket_[4-7] length_of_stay_bucket_[8-inf] rate_plan_Standard rate_plan_Nonref room_segment_[0-160] room_segment_[160-260] room_segment_[260-360] room_segment_[360-500] room_segment_[500-900] n_people_bucket_[1-1] n_people_bucket_[2-2] n_people_bucket_[3-4] n_people_bucket_[5-inf] weekend_stay_True weekend_stay_False
0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 1 1 0
1 1 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 1 0 1 0
2 2 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0 1 0 0 0 1
3 3 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0
4 4 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 1 0 0 1 0
5 5 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 1 1 0
6 6 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 1 1 0

Neural network recommender

Task:
Code a recommender based on a neural network model. You are free to choose any network architecture you find appropriate. The network can use the interaction vectors for users and items, embeddings of users and items, as well as user and item features (you can use the features you developed in the first project).

Remember to keep control over randomness - in the init method add the seed as a parameter and initialize the random seed generator with that seed (both for numpy and pytorch):

self.seed = seed
self.rng = np.random.RandomState(seed=seed)

in the network model:

self.seed = torch.manual_seed(seed)

You are encouraged to experiment with:

  • the number of layers in the network, the number of neurons and different activation functions,
  • different optimizers and their parameters,
  • batch size and the number of epochs,
  • embedding layers,
  • content-based features of both users and items.
from recommenders.recommender import Recommender


# HR10 = 0.07
# class Net(nn.Module):
#     def __init__(self, features_len, output_len):
#         super(Net, self).__init__()
        
#         self.fc1 = nn.Linear(features_len, 150)
#         self.fc2 = nn.Linear(150, 100)
#         self.fc3 = nn.Linear(100, output_len)
#         self.fc4 = nn.Linear(output_len, output_len+200)
        
#         self.dropout = nn.Dropout(p=0.5)
        
#     def forward(self, x):
#         x = F.relu(self.fc1(x))
#         x = self.dropout(x)
#         x = F.relu(self.fc2(x))
#         x = self.dropout(x)
#         x = F.relu(self.fc3(x))
#         return self.fc4(x)

# HR10 = 0.06
# class Net(nn.Module):
#     def __init__(self, features_len, output_len):
#         super(Net, self).__init__()
        
#         self.fc1 = nn.Linear(features_len, 150)
#         self.fc2 = nn.Linear(150, 100)
#         self.fc3 = nn.Linear(100, output_len)
#         self.fc4 = nn.Linear(output_len, output_len+150)

#         self.dropout = nn.Dropout(p=0.5)
        
#     def forward(self, x):
#         x = F.relu(self.fc1(x))
#         x = self.dropout(x)
#         x = F.relu(self.fc2(x))
#         x = self.dropout(x)
#         x = F.relu(self.fc3(x))
#         x = self.dropout(x)
#         return self.fc4(x)

# Softmax very bad choice for multiclassification
# class Net(nn.Module):
#     def __init__(self, features_len, output_len):
#         super(Net, self).__init__()
        
#         self.fc1 = nn.Linear(features_len, 150)
#         self.fc2 = nn.Linear(150, 100)
#         self.fc3 = nn.Linear(100, output_len)
#         self.fc4 = nn.Linear(output_len, output_len+200)
        
#         self.dropout = nn.Dropout(p=0.5)
#         self.softmax = nn.Softmax()
        
#     def forward(self, x):
#         x = F.relu(self.fc1(x))
#         x = self.dropout(x)
#         x = F.relu(self.fc2(x))
#         x = self.dropout(x)
#         x = F.relu(self.fc3(x))
#         x = self.fc4(x)
#         x = self.softmax(x)
#         return x
    
# HR10 = 0.116 EPOCH 20000
class Net(nn.Module):
    def __init__(self, features_len, output_len):
        super(Net, self).__init__()
        
        self.fc1 = nn.Linear(features_len, 150)
        self.fc2 = nn.Linear(150, 100)
        self.fc3 = nn.Linear(100, output_len)
        self.fc4 = nn.Linear(output_len, output_len+200)
        
        self.dropout = nn.Dropout(p=0.5)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        return self.fc4(x)

# A lot slower than ReLU
# class Net(nn.Module):
#     def __init__(self, features_len, output_len):
#         super(Net, self).__init__()
        
#         self.fc1 = nn.Linear(features_len, 150)
#         self.fc2 = nn.Linear(150, 100)
#         self.fc3 = nn.Linear(100, output_len)
#         self.fc4 = nn.Linear(output_len, output_len+200)
        
#         self.dropout = nn.Dropout(p=0.5)
#         self.prelu = nn.PReLU()
        
#     def forward(self, x):
#         x = self.fc1(x)
#         x = self.prelu(x)
#         x = self.dropout(x)
#         x = self.fc2(x)
#         x = self.prelu(x)
#         x = self.dropout(x)
#         x = self.fc3(x)
#         x = self.prelu(x)
#         return self.fc4(x)
    
class NNRecommender(Recommender):
    """
    Linear recommender class based on user and item features.
    """
 
    def __init__(self, seed=6789, n_neg_per_pos=5, n_epochs=20000, lr=0.01):
        """
        Initialize base recommender params and variables.
        """
        self.model = None
        self.n_neg_per_pos = n_neg_per_pos
 
        self.recommender_df = pd.DataFrame(columns=['user_id', 'item_id', 'score'])
        self.users_df = None
        self.user_features = None
 
        self.seed = seed
        self.rng = np.random.RandomState(seed=seed)
        
        self.n_epochs = n_epochs
        self.lr = lr
 
    def calculate_accuracy(self, y_true, y_pred):
        predictions=(y_pred.argmax(1))
        return (predictions == y_true).sum().float() / len(y_true)
 
    def round_tensor(self, t, decimal_places=3):
        return round(t.item(), decimal_places)
 
    def fit(self, interactions_df, users_df, items_df):
        """
        Training of the recommender.
 
        :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items 
            defined by user_id, item_id and features of the interaction.
        :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.
        :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.
        """
 
        interactions_df = interactions_df.copy()
        # Prepare users_df and items_df 
        # (optional - use only if you want to train a hybrid model with content-based features)
 
        users_df, user_features = prepare_users_df(interactions_df)
 
        self.users_df = users_df
        self.user_features = user_features
 
        items_df, item_features = prepare_items_df(interactions_df)
        items_df = items_df.loc[:, ['item_id'] + item_features]
 
        X = items_df[['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']]
        y = items_df[['item_id']]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=self.seed)
 
        X_train = torch.from_numpy(X_train.to_numpy()).float()
        y_train = torch.squeeze(torch.from_numpy(y_train.to_numpy()).long())
        X_test = torch.from_numpy(X_test.to_numpy()).float()
        y_test = torch.squeeze(torch.from_numpy(y_test.to_numpy()).long())
 
        self.net = Net(X_train.shape[1], items_df['item_id'].unique().size)
 
        optimizer = optim.Adam(self.net.parameters(), lr=self.lr)
        criterion = nn.CrossEntropyLoss()
 
        for epoch in range(self.n_epochs):
            y_pred = self.net(X_train)
            y_pred = torch.squeeze(y_pred)
            train_loss = criterion(y_pred, y_train)
 
            if epoch % 1000 == 0:
                y_test_pred = self.net(X_test)
                y_test_pred = torch.squeeze(y_test_pred)
                test_loss = criterion(y_test_pred, y_test)
                print(
        f'''epoch {epoch}
        Train set - loss: {self.round_tensor(train_loss)}
        Test  set - loss: {self.round_tensor(test_loss)}
        ''')
 
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
 
    def recommend(self, users_df, items_df, n_recommendations=1):
        """
        Serving of recommendations. Scores items in items_df for each user in users_df and returns 
        top n_recommendations for each user.
 
        :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.
        :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.
        :param int n_recommendations: Number of recommendations to be returned for each user.
        :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations 
            for each user.
        :rtype: pd.DataFrame
        """
 
        # Clean previous recommendations (iloc could be used alternatively)
        self.recommender_df = self.recommender_df[:0]
 
        # Prepare users_df and items_df
        # (optional - use only if you want to train a hybrid model with content-based features)
 
        users_df = users_df.loc[:, 'user_id']
        users_df = pd.merge(users_df, self.users_df, on=['user_id'], how='left').fillna(0)
 
        #         items_df, item_features = prepare_items_df(items_df)
        #         items_df = items_df.loc[:, ['item_id'] + item_features]
 
        # Score the items
 
        recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])
 
        for ix, user in users_df.iterrows():
            prep_user = torch.from_numpy(user[['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']].to_numpy()).float()
            
            scores = self.net(prep_user).detach().numpy()
 
            chosen_ids = np.argsort(-scores)[:n_recommendations]
 
            recommendations = []
            for item_id in chosen_ids:
                recommendations.append(
                    {
                        'user_id': user['user_id'],
                        'item_id': item_id,
                        'score': scores[item_id]
                    }
                )
 
            user_recommendations = pd.DataFrame(recommendations)
 
            self.recommender_df = pd.concat([self.recommender_df, user_recommendations])
 
        return self.recommender_df

# Fit method
# nn_recommender = NNRecommender(10000, 0.02)
# nn_recommender.fit(interactions_df.head(1000), None, None)
# nn_recommender.fit(interactions_df, None, None)

Quick test of the recommender

items_df = interactions_df.loc[:, ['item_id'] + base_item_features].drop_duplicates()
# Fit method
nn_recommender = NNRecommender(n_epochs=200, lr=0.01)
nn_recommender.fit(interactions_df.head(1000), None, None)
# nn_recommender.fit(interactions_df, None, None)
epoch 0
        Train set - loss: 6.042, accuracy: 0.011
        Test  set - loss: 6.025, accuracy: 0.0
        
epoch 100
        Train set - loss: 1.162, accuracy: 0.506
        Test  set - loss: 36.526, accuracy: 0.0
        
# Recommender method

recommendations = nn_recommender.recommend(pd.DataFrame([[1],[3]], columns=['user_id']), items_df, 3)

recommendations = pd.merge(recommendations, items_df, on='item_id', how='left')
display(HTML(recommendations.to_html()))
user_id item_id score term length_of_stay_bucket rate_plan room_segment n_people_bucket weekend_stay
0 1.0 119 5.364058 Easter [2-3] Standard [160-260] [2-2] True
1 1.0 88 5.033441 WinterVacation [0-1] Standard [160-260] [2-2] True
2 1.0 57 4.771185 WinterVacation [2-3] Standard [160-260] [2-2] True
3 3.0 2 11.286193 WinterVacation [2-3] Standard [160-260] [2-2] False
4 3.0 74 10.848604 WinterVacation [4-7] Standard [160-260] [2-2] False
5 3.0 81 10.656947 WinterVacation [0-1] Standard [160-260] [2-2] False

Tuning method

from evaluation_and_testing.testing import evaluate_train_test_split_implicit

seed = 6789
from hyperopt import hp, fmin, tpe, Trials
import traceback

def tune_recommender(recommender_class, interactions_df, items_df, 
                     param_space, max_evals=1, show_progressbar=True, seed=6789):
    # Split into train_validation and test sets

    shuffle = np.arange(len(interactions_df))
    rng = np.random.RandomState(seed=seed)
    rng.shuffle(shuffle)
    shuffle = list(shuffle)

    train_test_split = 0.8
    split_index = int(len(interactions_df) * train_test_split)

    train_validation = interactions_df.iloc[shuffle[:split_index]]
    test = interactions_df.iloc[shuffle[split_index:]]

    # Tune

    def loss(tuned_params):
        recommender = recommender_class(seed=seed, **tuned_params)
        hr1, hr3, hr5, hr10, ndcg1, ndcg3, ndcg5, ndcg10 = evaluate_train_test_split_implicit(
            recommender, train_validation, items_df, seed=seed)
        return -hr10

    n_tries = 1
    succeded = False
    try_id = 0
    while not succeded and try_id < n_tries:
        try:
            trials = Trials()
            best_param_set = fmin(loss, space=param_space, algo=tpe.suggest, 
                                  max_evals=max_evals, show_progressbar=show_progressbar, trials=trials, verbose=True)
            succeded = True
        except:
            traceback.print_exc()
            try_id += 1
            
    if not succeded:
        return None
        
    # Validate
    
    recommender = recommender_class(seed=seed, **best_param_set)

    results = [[recommender_class.__name__] + list(evaluate_train_test_split_implicit(
        recommender, {'train': train_validation, 'test': test}, items_df, seed=seed))]

    results = pd.DataFrame(results, 
                           columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])

    display(HTML(results.to_html()))
    
    return best_param_set

Tuning of the recommender

Task:
Tune your model using the code below. You only need to put the class name of your recommender and choose an appropriate parameter space.

param_space = {
    'n_neg_per_pos': hp.quniform('n_neg_per_pos', 1, 10, 1)
}
items_df['item_id'].unique().size

best_param_set = tune_recommender(NNRecommender, interactions_df, items_df,
                                  param_space, max_evals=10, show_progressbar=True, seed=seed)

print("Best parameters:")
print(best_param_set)
epoch 0                                               
        Train set - loss: 6.791
        Test  set - loss: 6.798
        
epoch 1000                                            
        Train set - loss: 1.044
        Test  set - loss: 25.104
        
epoch 2000                                            
        Train set - loss: 1.031
        Test  set - loss: 28.583
        
epoch 3000                                            
        Train set - loss: 0.995
        Test  set - loss: 32.894
        
epoch 4000                                            
        Train set - loss: 0.958
        Test  set - loss: 32.049
        
epoch 5000                                            
        Train set - loss: 0.95
        Test  set - loss: 33.561
        
epoch 6000                                            
        Train set - loss: 0.919
        Test  set - loss: 37.039
        
epoch 7000                                            
        Train set - loss: 0.951
        Test  set - loss: 41.181
        
epoch 8000                                            
        Train set - loss: 0.914
        Test  set - loss: 39.916
        
epoch 9000                                            
        Train set - loss: 0.996
        Test  set - loss: 40.807
        
epoch 10000                                           
        Train set - loss: 0.917
        Test  set - loss: 43.963
        
epoch 11000                                           
        Train set - loss: 0.974
        Test  set - loss: 42.84
        
epoch 12000                                           
        Train set - loss: 0.961
        Test  set - loss: 48.198
        
epoch 13000                                           
        Train set - loss: 0.923
        Test  set - loss: 50.819
        
epoch 14000                                           
        Train set - loss: 0.989
        Test  set - loss: 50.511
        
epoch 15000                                           
        Train set - loss: 0.905
        Test  set - loss: 53.104
        
epoch 16000                                           
        Train set - loss: 0.966
        Test  set - loss: 51.585
        
epoch 17000                                           
        Train set - loss: 0.934
        Test  set - loss: 55.722
        
epoch 18000                                           
        Train set - loss: 0.926
        Test  set - loss: 56.764
        
epoch 19000                                           
        Train set - loss: 0.941
        Test  set - loss: 59.002
        
epoch 0                                                                               
        Train set - loss: 6.794
        Test  set - loss: 6.799
        
epoch 1000                                                                            
        Train set - loss: 1.016
        Test  set - loss: 23.549
        
epoch 2000                                                                            
        Train set - loss: 1.04
        Test  set - loss: 26.724
        
epoch 3000                                                                            
        Train set - loss: 1.02
        Test  set - loss: 30.851
        
epoch 4000                                                                            
        Train set - loss: 0.966
        Test  set - loss: 32.59
        
epoch 5000                                                                            
        Train set - loss: 0.976
        Test  set - loss: 34.689
        
epoch 6000                                                                            
        Train set - loss: 0.996
        Test  set - loss: 36.343
        
epoch 7000                                                                            
        Train set - loss: 0.946
        Test  set - loss: 38.011
        
epoch 8000                                                                            
        Train set - loss: 0.939
        Test  set - loss: 42.002
        
epoch 9000                                                                            
        Train set - loss: 0.94
        Test  set - loss: 40.951
        
epoch 10000                                                                           
        Train set - loss: 0.917
        Test  set - loss: 44.119
        
epoch 11000                                                                           
        Train set - loss: 0.907
        Test  set - loss: 43.487
        
epoch 12000                                                                           
        Train set - loss: 0.916
        Test  set - loss: 47.867
        
epoch 13000                                                                           
        Train set - loss: 1.014
        Test  set - loss: 50.954
        
epoch 14000                                                                           
        Train set - loss: 0.974
        Test  set - loss: 51.885
        
epoch 15000                                                                           
        Train set - loss: 0.966
        Test  set - loss: 53.497
        
epoch 16000                                                                           
        Train set - loss: 0.92
        Test  set - loss: 52.769
        
epoch 17000                                                                           
        Train set - loss: 0.938
        Test  set - loss: 53.099
        
epoch 18000                                                                           
        Train set - loss: 0.94
        Test  set - loss: 55.683
        
epoch 19000                                                                           
        Train set - loss: 0.973
        Test  set - loss: 55.271
        
epoch 0                                                                               
        Train set - loss: 6.794
        Test  set - loss: 6.782
        
epoch 1000                                                                            
        Train set - loss: 0.992
        Test  set - loss: 23.159
        
epoch 2000                                                                            
        Train set - loss: 0.959
        Test  set - loss: 26.26
        
epoch 3000                                                                            
        Train set - loss: 0.966
        Test  set - loss: 28.225
        
epoch 4000                                                                            
        Train set - loss: 0.964
        Test  set - loss: 32.285
        
epoch 5000                                                                            
        Train set - loss: 0.92
        Test  set - loss: 33.963
        
epoch 6000                                                                            
        Train set - loss: 0.977
        Test  set - loss: 36.435
        
epoch 7000                                                                            
        Train set - loss: 0.952
        Test  set - loss: 40.532
        
epoch 8000                                                                            
        Train set - loss: 0.937
        Test  set - loss: 41.049
        
epoch 9000                                                                            
        Train set - loss: 0.956
        Test  set - loss: 44.045
        
epoch 10000                                                                           
        Train set - loss: 0.952
        Test  set - loss: 48.621
        
epoch 11000                                                                           
        Train set - loss: 0.975
        Test  set - loss: 52.81
        
epoch 12000                                                                           
        Train set - loss: 0.937
        Test  set - loss: 51.067
        
epoch 13000                                                                           
        Train set - loss: 0.914
        Test  set - loss: 58.222
        
epoch 14000                                                                           
        Train set - loss: 0.932
        Test  set - loss: 58.447
        
epoch 15000                                                                           
        Train set - loss: 0.974
        Test  set - loss: 57.224
        
epoch 16000                                                                           
        Train set - loss: 0.933
        Test  set - loss: 62.57
        
epoch 17000                                                                           
        Train set - loss: 0.96
        Test  set - loss: 63.399
        
epoch 18000                                                                           
        Train set - loss: 0.937
        Test  set - loss: 65.288
        
epoch 19000                                                                           
        Train set - loss: 1.02
        Test  set - loss: 62.537
        
epoch 0                                                                                
        Train set - loss: 6.797
        Test  set - loss: 6.792
        
epoch 1000                                                                             
        Train set - loss: 1.106
        Test  set - loss: 23.897
        
epoch 2000                                                                             
        Train set - loss: 1.028
        Test  set - loss: 25.238
        
epoch 3000                                                                             
        Train set - loss: 0.981
        Test  set - loss: 29.186
        
epoch 4000                                                                             
        Train set - loss: 0.981
        Test  set - loss: 30.399
        
epoch 5000                                                                             
        Train set - loss: 0.967
        Test  set - loss: 33.602
        
epoch 6000                                                                             
        Train set - loss: 0.992
        Test  set - loss: 35.063
        
epoch 7000                                                                               
        Train set - loss: 0.955
        Test  set - loss: 35.093
        
epoch 8000                                                                               
        Train set - loss: 0.984
        Test  set - loss: 35.48
        
epoch 9000                                                                               
        Train set - loss: 1.044
        Test  set - loss: 37.907
        
epoch 10000                                                                              
        Train set - loss: 0.914
        Test  set - loss: 40.246
        
epoch 11000                                                                              
        Train set - loss: 0.941
        Test  set - loss: 41.36
        
epoch 12000                                                                              
        Train set - loss: 0.995
        Test  set - loss: 41.922
        
epoch 13000                                                                              
        Train set - loss: 0.991
        Test  set - loss: 45.061
        
epoch 14000                                                                              
        Train set - loss: 0.907
        Test  set - loss: 47.871
        
epoch 15000                                                                              
        Train set - loss: 0.964
        Test  set - loss: 49.0
        
epoch 16000                                                                              
        Train set - loss: 0.918
        Test  set - loss: 49.898
        
epoch 17000                                                                              
        Train set - loss: 0.925
        Test  set - loss: 52.609
        
epoch 18000                                                                              
        Train set - loss: 0.943
        Test  set - loss: 55.524
        
epoch 19000                                                                              
        Train set - loss: 0.988
        Test  set - loss: 53.781
        
epoch 0                                                                                  
        Train set - loss: 6.797
        Test  set - loss: 6.794
        
epoch 1000                                                                               
        Train set - loss: 1.083
        Test  set - loss: 24.762
        
epoch 2000                                                                               
        Train set - loss: 1.002
        Test  set - loss: 26.87
        
epoch 3000                                                                               
        Train set - loss: 1.002
        Test  set - loss: 29.752
        
epoch 4000                                                                               
        Train set - loss: 0.902
        Test  set - loss: 30.802
        
epoch 5000                                                                               
        Train set - loss: 0.966
        Test  set - loss: 33.726
        
epoch 6000                                                                               
        Train set - loss: 0.929
        Test  set - loss: 38.221
        
epoch 7000                                                                               
        Train set - loss: 0.923
        Test  set - loss: 40.249
        
epoch 8000                                                                               
        Train set - loss: 0.941
        Test  set - loss: 43.72
        
epoch 9000                                                                               
        Train set - loss: 0.988
        Test  set - loss: 45.261
        
epoch 10000                                                                              
        Train set - loss: 0.958
        Test  set - loss: 49.028
        
epoch 11000                                                                              
        Train set - loss: 0.914
        Test  set - loss: 51.199
        
epoch 12000                                                                              
        Train set - loss: 0.984
        Test  set - loss: 52.24
        
epoch 13000                                                                              
        Train set - loss: 0.935
        Test  set - loss: 58.326
        
epoch 14000                                                                              
        Train set - loss: 0.932
        Test  set - loss: 55.572
        
epoch 15000                                                                              
        Train set - loss: 0.932
        Test  set - loss: 57.253
        
epoch 16000                                                                              
        Train set - loss: 0.901
        Test  set - loss: 59.313
        
epoch 17000                                                                              
        Train set - loss: 0.934
        Test  set - loss: 59.817
        
epoch 18000                                                                              
        Train set - loss: 0.994
        Test  set - loss: 57.325
        
epoch 19000                                                                              
        Train set - loss: 0.913
        Test  set - loss: 59.364
        
epoch 0                                                                                  
        Train set - loss: 6.795
        Test  set - loss: 6.796
        
epoch 1000                                                                             
        Train set - loss: 1.067
        Test  set - loss: 25.381
        
epoch 2000                                                                             
        Train set - loss: 1.039
        Test  set - loss: 27.164
        
epoch 3000                                                                             
        Train set - loss: 0.958
        Test  set - loss: 30.859
        
epoch 4000                                                                             
        Train set - loss: 0.961
        Test  set - loss: 32.549
        
epoch 5000                                                                             
        Train set - loss: 0.922
        Test  set - loss: 38.252
        
epoch 6000                                                                             
        Train set - loss: 0.971
        Test  set - loss: 37.736
        
epoch 7000                                                                             
        Train set - loss: 0.986
        Test  set - loss: 43.201
        
epoch 8000                                                                             
        Train set - loss: 0.949
        Test  set - loss: 43.737
        
epoch 9000                                                                             
        Train set - loss: 0.895
        Test  set - loss: 44.754
        
epoch 10000                                                                            
        Train set - loss: 0.976
        Test  set - loss: 49.17
        
epoch 11000                                                                            
        Train set - loss: 0.941
        Test  set - loss: 51.909
        
epoch 12000                                                                            
        Train set - loss: 0.917
        Test  set - loss: 53.406
        
epoch 13000                                                                            
        Train set - loss: 0.97
        Test  set - loss: 57.24
        
epoch 14000                                                                            
        Train set - loss: 0.944
        Test  set - loss: 54.791
        
epoch 15000                                                                            
        Train set - loss: 0.969
        Test  set - loss: 56.372
        
epoch 16000                                                                            
        Train set - loss: 0.981
        Test  set - loss: 58.586
        
epoch 17000                                                                            
        Train set - loss: 0.965
        Test  set - loss: 57.376
        
epoch 18000                                                                            
        Train set - loss: 0.988
        Test  set - loss: 60.655
        
epoch 19000                                                                            
        Train set - loss: 0.883
        Test  set - loss: 58.51
        
epoch 0                                                                                
        Train set - loss: 6.794
        Test  set - loss: 6.786
        
epoch 1000                                                                             
        Train set - loss: 1.074
        Test  set - loss: 24.294
        
epoch 2000                                                                             
        Train set - loss: 1.002
        Test  set - loss: 25.177
        
epoch 3000                                                                             
        Train set - loss: 0.979
        Test  set - loss: 28.115
        
epoch 4000                                                                             
        Train set - loss: 0.974
        Test  set - loss: 31.27
        
epoch 5000                                                                             
        Train set - loss: 0.929
        Test  set - loss: 35.596
        
epoch 6000                                                                             
        Train set - loss: 0.956
        Test  set - loss: 39.096
        
epoch 7000                                                                             
        Train set - loss: 0.944
        Test  set - loss: 39.886
        
epoch 8000                                                                             
        Train set - loss: 0.951
        Test  set - loss: 44.383
        
epoch 9000                                                                             
        Train set - loss: 0.976
        Test  set - loss: 46.715
        
epoch 10000                                                                            
        Train set - loss: 0.907
        Test  set - loss: 48.878
        
epoch 11000                                                                            
        Train set - loss: 0.957
        Test  set - loss: 49.986
        
epoch 12000                                                                            
        Train set - loss: 0.998
        Test  set - loss: 52.608
        
epoch 13000                                                                            
        Train set - loss: 0.986
        Test  set - loss: 51.419
        
epoch 14000                                                                            
        Train set - loss: 0.984
        Test  set - loss: 55.804
        
epoch 15000                                                                            
        Train set - loss: 0.965
        Test  set - loss: 57.902
        
epoch 16000                                                                            
        Train set - loss: 0.905
        Test  set - loss: 57.022
        
epoch 17000                                                                            
        Train set - loss: 0.96
        Test  set - loss: 53.676
        
epoch 18000                                                                            
        Train set - loss: 0.939
        Test  set - loss: 62.478
        
epoch 19000                                                                            
        Train set - loss: 0.93
        Test  set - loss: 61.828
        
epoch 0                                                                                
        Train set - loss: 6.793
        Test  set - loss: 6.794
        
epoch 1000                                                                             
        Train set - loss: 1.063
        Test  set - loss: 23.191
        
epoch 2000                                                                             
        Train set - loss: 1.032
        Test  set - loss: 26.461
        
epoch 3000                                                                             
        Train set - loss: 1.02
        Test  set - loss: 29.392
        
epoch 4000                                                                             
        Train set - loss: 0.932
        Test  set - loss: 33.168
        
epoch 5000                                                                             
        Train set - loss: 1.017
        Test  set - loss: 34.574
        
epoch 6000                                                                             
        Train set - loss: 0.975
        Test  set - loss: 38.711
        
epoch 7000                                                                             
        Train set - loss: 0.953
        Test  set - loss: 39.829
        
epoch 8000                                                                             
        Train set - loss: 0.91
        Test  set - loss: 41.895
        
epoch 9000                                                                             
        Train set - loss: 0.989
        Test  set - loss: 45.25
        
epoch 10000                                                                            
        Train set - loss: 1.0
        Test  set - loss: 46.407
        
epoch 11000                                                                            
        Train set - loss: 0.98
        Test  set - loss: 50.797
        
epoch 12000                                                                            
        Train set - loss: 0.983
        Test  set - loss: 53.173
        
epoch 13000                                                                            
        Train set - loss: 0.925
        Test  set - loss: 54.291
        
epoch 14000                                                                            
        Train set - loss: 0.926
        Test  set - loss: 54.929
        
epoch 15000                                                                            
        Train set - loss: 0.986
        Test  set - loss: 58.36
        
epoch 16000                                                                            
        Train set - loss: 0.944
        Test  set - loss: 57.972
        
epoch 17000                                                                            
        Train set - loss: 0.963
        Test  set - loss: 58.177
        
epoch 18000                                                                            
        Train set - loss: 0.967
        Test  set - loss: 57.693
        
epoch 19000                                                                            
        Train set - loss: 0.97
        Test  set - loss: 62.002
        
epoch 0                                                                                
        Train set - loss: 6.793
        Test  set - loss: 6.798
        
epoch 1000                                                                           
        Train set - loss: 1.046
        Test  set - loss: 24.413
        
epoch 2000                                                                           
        Train set - loss: 0.981
        Test  set - loss: 28.192
        
epoch 3000                                                                           
        Train set - loss: 0.966
        Test  set - loss: 29.734
        
epoch 4000                                                                           
        Train set - loss: 0.989
        Test  set - loss: 34.306
        
epoch 5000                                                                           
        Train set - loss: 0.967
        Test  set - loss: 34.852
        
epoch 6000                                                                           
        Train set - loss: 0.902
        Test  set - loss: 37.421
        
epoch 7000                                                                           
        Train set - loss: 0.94
        Test  set - loss: 37.481
        
epoch 8000                                                                           
        Train set - loss: 0.951
        Test  set - loss: 40.332
        
epoch 9000                                                                           
        Train set - loss: 0.945
        Test  set - loss: 48.709
        
epoch 10000                                                                          
        Train set - loss: 0.967
        Test  set - loss: 50.611
        
epoch 11000                                                                          
        Train set - loss: 0.99
        Test  set - loss: 49.536
        
epoch 12000                                                                          
        Train set - loss: 0.991
        Test  set - loss: 53.281
        
epoch 13000                                                                          
        Train set - loss: 0.911
        Test  set - loss: 53.05
        
epoch 14000                                                                          
        Train set - loss: 0.952
        Test  set - loss: 56.761
        
epoch 15000                                                                          
        Train set - loss: 0.97
        Test  set - loss: 57.142
        
epoch 16000                                                                          
        Train set - loss: 0.921
        Test  set - loss: 57.22
        
epoch 17000                                                                          
        Train set - loss: 0.937
        Test  set - loss: 59.433
        
epoch 18000                                                                          
        Train set - loss: 0.964
        Test  set - loss: 58.954
        
epoch 19000                                                                          
        Train set - loss: 0.91
        Test  set - loss: 57.752
        
epoch 0                                                                              
        Train set - loss: 6.797
        Test  set - loss: 6.793
        
epoch 1000                                                                           
        Train set - loss: 1.052
        Test  set - loss: 25.378
        
epoch 2000                                                                           
        Train set - loss: 0.967
        Test  set - loss: 30.641
        
epoch 3000                                                                           
        Train set - loss: 0.97
        Test  set - loss: 32.983
        
epoch 4000                                                                           
        Train set - loss: 0.931
        Test  set - loss: 35.008
        
epoch 5000                                                                           
        Train set - loss: 0.95
        Test  set - loss: 38.592
        
epoch 6000                                                                           
        Train set - loss: 0.961
        Test  set - loss: 41.785
        
epoch 7000                                                                           
        Train set - loss: 0.93
        Test  set - loss: 46.456
        
epoch 8000                                                                           
        Train set - loss: 0.977
        Test  set - loss: 46.483
        
epoch 9000                                                                           
        Train set - loss: 0.955
        Test  set - loss: 48.554
        
epoch 10000                                                                          
        Train set - loss: 0.941
        Test  set - loss: 53.479
        
epoch 11000                                                                          
        Train set - loss: 1.003
        Test  set - loss: 51.243
        
epoch 12000                                                                          
        Train set - loss: 0.987
        Test  set - loss: 55.073
        
epoch 13000                                                                          
        Train set - loss: 0.995
        Test  set - loss: 56.564
        
epoch 14000                                                                          
        Train set - loss: 0.953
        Test  set - loss: 55.438
        
epoch 15000                                                                          
        Train set - loss: 0.911
        Test  set - loss: 58.512
        
epoch 16000                                                                          
        Train set - loss: 0.922
        Test  set - loss: 57.445
        
epoch 17000                                                                          
        Train set - loss: 0.949
        Test  set - loss: 60.568
        
epoch 18000                                                                          
        Train set - loss: 0.984
        Test  set - loss: 60.303
        
epoch 19000                                                                          
        Train set - loss: 0.962
        Test  set - loss: 63.902
        
100%|██████████| 10/10 [3:22:15<00:00, 1213.59s/trial, best loss: -0.0823433019254404]
epoch 0
        Train set - loss: 6.842
        Test  set - loss: 6.834
        
epoch 1000
        Train set - loss: 1.101
        Test  set - loss: 25.026
        
epoch 2000
        Train set - loss: 0.971
        Test  set - loss: 28.552
        
epoch 3000
        Train set - loss: 0.989
        Test  set - loss: 32.089
        
epoch 4000
        Train set - loss: 0.99
        Test  set - loss: 33.257
        
epoch 5000
        Train set - loss: 0.985
        Test  set - loss: 36.744
        
epoch 6000
        Train set - loss: 0.971
        Test  set - loss: 38.915
        
epoch 7000
        Train set - loss: 0.977
        Test  set - loss: 40.527
        
epoch 8000
        Train set - loss: 1.013
        Test  set - loss: 42.967
        
epoch 9000
        Train set - loss: 0.981
        Test  set - loss: 44.936
        
epoch 10000
        Train set - loss: 0.975
        Test  set - loss: 52.466
        
epoch 11000
        Train set - loss: 0.949
        Test  set - loss: 50.95
        
epoch 12000
        Train set - loss: 0.933
        Test  set - loss: 51.5
        
epoch 13000
        Train set - loss: 1.023
        Test  set - loss: 54.636
        
epoch 14000
        Train set - loss: 0.987
        Test  set - loss: 59.892
        
epoch 15000
        Train set - loss: 0.996
        Test  set - loss: 57.323
        
epoch 16000
        Train set - loss: 0.989
        Test  set - loss: 61.067
        
epoch 17000
        Train set - loss: 0.969
        Test  set - loss: 64.222
        
epoch 18000
        Train set - loss: 0.925
        Test  set - loss: 62.306
        
epoch 19000
        Train set - loss: 1.006
        Test  set - loss: 63.963
        
Recommender HR@1 HR@3 HR@5 HR@10 NDCG@1 NDCG@3 NDCG@5 NDCG@10
0 NNRecommender 0.005265 0.015137 0.020401 0.032247 0.005265 0.010976 0.013143 0.01686
Best parameters:
{'n_neg_per_pos': 5.0}

Final evaluation

Task:
Run the final evaluation of your recommender and present its results against the Amazon and Netflix recommenders' results. You just need to give the class name of your recommender and its tuned parameters below.

nn_recommender = NNRecommender(n_neg_per_pos=6, n_epochs=20000)  # Initialize your recommender here

# Give the name of your recommender in the line below
nn_tts_results = [['NNRecommender'] + list(evaluate_train_test_split_implicit(
    nn_recommender, interactions_df, items_df))]

nn_tts_results = pd.DataFrame(
    nn_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])

display(HTML(nn_tts_results.to_html()))
Recommender HR@1 HR@3 HR@5 HR@10 NDCG@1 NDCG@3 NDCG@5 NDCG@10
0 NNRecommender 0.025008 0.035209 0.066469 0.116815 0.025008 0.0311 0.043697 0.059459
from recommenders.amazon_recommender import AmazonRecommender

amazon_recommender = AmazonRecommender()

amazon_tts_results = [['AmazonRecommender'] + list(evaluate_train_test_split_implicit(
    amazon_recommender, interactions_df, items_df))]

amazon_tts_results = pd.DataFrame(
    amazon_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])

display(HTML(amazon_tts_results.to_html()))
Recommender HR@1 HR@3 HR@5 HR@10 NDCG@1 NDCG@3 NDCG@5 NDCG@10
0 AmazonRecommender 0.042119 0.10464 0.140507 0.199408 0.042119 0.076826 0.091797 0.110711
from recommenders.netflix_recommender import NetflixRecommender

netflix_recommender = NetflixRecommender(n_epochs=30, print_type='live')

netflix_tts_results = [['NetflixRecommender'] + list(evaluate_train_test_split_implicit(
    netflix_recommender, interactions_df, items_df))]

netflix_tts_results = pd.DataFrame(
    netflix_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])

display(HTML(netflix_tts_results.to_html()))
Loss
	training         	 (min:    0.161, max:    0.228, cur:    0.161)
	validation       	 (min:    0.176, max:    0.242, cur:    0.177)
Recommender HR@1 HR@3 HR@5 HR@10 NDCG@1 NDCG@3 NDCG@5 NDCG@10
0 NetflixRecommender 0.042777 0.106614 0.143139 0.200395 0.042777 0.078228 0.093483 0.111724
tts_results = pd.concat([nn_tts_results, amazon_tts_results, netflix_tts_results]).reset_index(drop=True)
display(HTML(tts_results.to_html()))
Recommender HR@1 HR@3 HR@5 HR@10 NDCG@1 NDCG@3 NDCG@5 NDCG@10
0 NNRecommender 0.025008 0.035209 0.066469 0.116815 0.025008 0.031100 0.043697 0.059459
1 AmazonRecommender 0.042119 0.104640 0.140507 0.199408 0.042119 0.076826 0.091797 0.110711
2 NetflixRecommender 0.042777 0.106614 0.143139 0.200395 0.042777 0.078228 0.093483 0.111724

Summary

Task:
Write a summary of your experiments. What worked well and what did not? What are your thoughts how could you possibly further improve the model?

What did not work:

  • I tried to use softmax, it wasn't a good idea
  • Firstly, I copied and pasted some code without thinking from tutorial for binary linear regresion. BCELoss is not a good idea for mutli-classification.
  • More layers don't mean better results.
  • More epochs don't always mean better results.
  • PReLU was a lot slower than ReLU and it did not give me better results.
  • For some reason, n_neg_per_pos I got from fitting wasn't the best fit. With one point bigger n_neg_per_pos I got better results.

What did work well:

  • Dropout layer increased results significantly (from HR@10 0.03 to 0.116).

  • Using all features give me best results.

    How to further improve model:

  • Add more data or more features

  • Work on network layout

  • Try using "One vs All" layout.