1751 lines
87 KiB
Plaintext
1751 lines
87 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"id": "verified-accommodation",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"%matplotlib inline\n",
|
||
|
"%load_ext autoreload\n",
|
||
|
"%autoreload 2\n",
|
||
|
"\n",
|
||
|
"import numpy as np\n",
|
||
|
"import pandas as pd\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"import seaborn as sns\n",
|
||
|
"from IPython.display import Markdown, display, HTML\n",
|
||
|
"from collections import defaultdict, deque\n",
|
||
|
"\n",
|
||
|
"import torch\n",
|
||
|
"import torch.nn as nn\n",
|
||
|
"import torch.optim as optim\n",
|
||
|
"\n",
|
||
|
"# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n",
|
||
|
"import os\n",
|
||
|
"os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'\n",
|
||
|
"os.environ['CUDA_LAUNCH_BLOCKING'] = '1'"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "educated-tourist",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Load data"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"id": "prepared-fraction",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>item_id</th>\n",
|
||
|
" <th>title</th>\n",
|
||
|
" <th>genres</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>118</th>\n",
|
||
|
" <td>145</td>\n",
|
||
|
" <td>Bad Boys (1995)</td>\n",
|
||
|
" <td>Action|Comedy|Crime|Drama|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>143</th>\n",
|
||
|
" <td>171</td>\n",
|
||
|
" <td>Jeffrey (1995)</td>\n",
|
||
|
" <td>Comedy|Drama</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>194</th>\n",
|
||
|
" <td>228</td>\n",
|
||
|
" <td>Destiny Turns on the Radio (1995)</td>\n",
|
||
|
" <td>Comedy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>199</th>\n",
|
||
|
" <td>233</td>\n",
|
||
|
" <td>Exotica (1994)</td>\n",
|
||
|
" <td>Drama</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>230</th>\n",
|
||
|
" <td>267</td>\n",
|
||
|
" <td>Major Payne (1995)</td>\n",
|
||
|
" <td>Comedy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>313</th>\n",
|
||
|
" <td>355</td>\n",
|
||
|
" <td>Flintstones, The (1994)</td>\n",
|
||
|
" <td>Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>379</th>\n",
|
||
|
" <td>435</td>\n",
|
||
|
" <td>Coneheads (1993)</td>\n",
|
||
|
" <td>Comedy|Sci-Fi</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>419</th>\n",
|
||
|
" <td>481</td>\n",
|
||
|
" <td>Kalifornia (1993)</td>\n",
|
||
|
" <td>Drama|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>615</th>\n",
|
||
|
" <td>780</td>\n",
|
||
|
" <td>Independence Day (a.k.a. ID4) (1996)</td>\n",
|
||
|
" <td>Action|Adventure|Sci-Fi|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>737</th>\n",
|
||
|
" <td>959</td>\n",
|
||
|
" <td>Of Human Bondage (1934)</td>\n",
|
||
|
" <td>Drama</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Number of interactions left: 1170\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"ml_ratings_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"ratings.csv\")).rename(columns={'userId': 'user_id', 'movieId': 'item_id'})\n",
|
||
|
"ml_movies_df = pd.read_csv(os.path.join(\"data\", \"movielens_small\", \"movies.csv\")).rename(columns={'movieId': 'item_id'})\n",
|
||
|
"ml_df = pd.merge(ml_ratings_df, ml_movies_df, on='item_id')\n",
|
||
|
"\n",
|
||
|
"# Filter the data to reduce the number of movies\n",
|
||
|
"seed = 6789\n",
|
||
|
"rng = np.random.RandomState(seed=seed)\n",
|
||
|
"left_ids = rng.choice(ml_movies_df['item_id'], size=100, replace=False)\n",
|
||
|
"\n",
|
||
|
"ml_ratings_df = ml_ratings_df.loc[ml_ratings_df['item_id'].isin(left_ids)]\n",
|
||
|
"ml_movies_df = ml_movies_df.loc[ml_movies_df['item_id'].isin(left_ids)]\n",
|
||
|
"ml_df = ml_df.loc[ml_df['item_id'].isin(left_ids)]\n",
|
||
|
"\n",
|
||
|
"display(HTML(ml_movies_df.head(10).to_html()))\n",
|
||
|
"\n",
|
||
|
"print(\"Number of interactions left: {}\".format(len(ml_ratings_df)))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "opponent-prediction",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Generalized Matrix Factorization (GMF)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 32,
|
||
|
"id": "fancy-return",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from livelossplot import PlotLosses\n",
|
||
|
"\n",
|
||
|
"from recommenders.recommender import Recommender\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"class GMFModel(nn.Module):\n",
|
||
|
" def __init__(self, n_items, n_users, embedding_dim, seed):\n",
|
||
|
" super().__init__()\n",
|
||
|
"\n",
|
||
|
" self.seed = torch.manual_seed(seed)\n",
|
||
|
" self.item_embedding = nn.Embedding(n_items, embedding_dim)\n",
|
||
|
" self.user_embedding = nn.Embedding(n_users, embedding_dim)\n",
|
||
|
" self.fc = nn.Linear(embedding_dim, 1, bias=False)\n",
|
||
|
"\n",
|
||
|
" def forward(self, x):\n",
|
||
|
" user_ids = x[:, 0]\n",
|
||
|
" item_ids = x[:, 1]\n",
|
||
|
" user_embedding = self.user_embedding(user_ids)\n",
|
||
|
" item_embedding = self.item_embedding(item_ids)\n",
|
||
|
" x = self.fc(user_embedding * item_embedding)\n",
|
||
|
" x = torch.sigmoid(x)\n",
|
||
|
"\n",
|
||
|
" return x\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"class GMFRecommender(Recommender):\n",
|
||
|
" \"\"\"\n",
|
||
|
" General Matrix Factorization recommender as described in:\n",
|
||
|
" - He X., Liao L., Zhang H., Nie L., Hu X., Chua T., Neural Collaborative Filtering, WWW Conference, 2017\n",
|
||
|
" \"\"\"\n",
|
||
|
"\n",
|
||
|
" def __init__(self, seed=6789, n_neg_per_pos=5, print_type=None, **params):\n",
|
||
|
" super().__init__()\n",
|
||
|
" self.recommender_df = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n",
|
||
|
" self.interactions_df = None\n",
|
||
|
" self.item_id_mapping = None\n",
|
||
|
" self.user_id_mapping = None\n",
|
||
|
" self.item_id_reverse_mapping = None\n",
|
||
|
" self.user_id_reverse_mapping = None\n",
|
||
|
" self.r = None\n",
|
||
|
" self.most_popular_items = None\n",
|
||
|
" \n",
|
||
|
" self.nn_model = None\n",
|
||
|
" self.optimizer = None\n",
|
||
|
" \n",
|
||
|
" self.n_neg_per_pos = n_neg_per_pos\n",
|
||
|
" if 'n_epochs' in params: # number of epochs (each epoch goes through the entire training set)\n",
|
||
|
" self.n_epochs = params['n_epochs']\n",
|
||
|
" else:\n",
|
||
|
" self.n_epochs = 10\n",
|
||
|
" if 'lr' in params: # learning rate\n",
|
||
|
" self.lr = params['lr']\n",
|
||
|
" else:\n",
|
||
|
" self.lr = 0.01\n",
|
||
|
" if 'weight_decay' in params: # weight decay (L2 regularization)\n",
|
||
|
" self.weight_decay = params['weight_decay']\n",
|
||
|
" else:\n",
|
||
|
" self.weight_decay = 0.001\n",
|
||
|
" if 'embedding_dim' in params:\n",
|
||
|
" self.embedding_dim = params['embedding_dim']\n",
|
||
|
" else:\n",
|
||
|
" self.embedding_dim = 4\n",
|
||
|
" if 'batch_size' in params:\n",
|
||
|
" self.batch_size = params['batch_size']\n",
|
||
|
" else:\n",
|
||
|
" self.batch_size = 64\n",
|
||
|
" if 'device' in params:\n",
|
||
|
" self.device = params['device']\n",
|
||
|
" else:\n",
|
||
|
" self.device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
|
||
|
" \n",
|
||
|
" if 'should_recommend_already_bought' in params:\n",
|
||
|
" self.should_recommend_already_bought = params['should_recommend_already_bought']\n",
|
||
|
" else:\n",
|
||
|
" self.should_recommend_already_bought = False\n",
|
||
|
" \n",
|
||
|
" if 'train' in params:\n",
|
||
|
" self.train = params['train']\n",
|
||
|
" else:\n",
|
||
|
" self.train = False\n",
|
||
|
" self.validation_set_size = 0.2\n",
|
||
|
" \n",
|
||
|
" self.seed = seed\n",
|
||
|
" self.rng = np.random.RandomState(seed=seed)\n",
|
||
|
" torch.manual_seed(seed)\n",
|
||
|
" \n",
|
||
|
" if 'should_save_model' in params:\n",
|
||
|
" self.should_save_model = params['should_save_model']\n",
|
||
|
" self.print_type = print_type\n",
|
||
|
"\n",
|
||
|
" def fit(self, interactions_df, users_df, items_df):\n",
|
||
|
" \"\"\"\n",
|
||
|
" Training of the recommender.\n",
|
||
|
"\n",
|
||
|
" :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items\n",
|
||
|
" defined by user_id, item_id and features of the interaction.\n",
|
||
|
" :param pd.DataFrame users_df: DataFrame with users and their features defined by\n",
|
||
|
" user_id and the user feature columns.\n",
|
||
|
" :param pd.DataFrame items_df: DataFrame with items and their features defined\n",
|
||
|
" by item_id and the item feature columns.\n",
|
||
|
" \"\"\"\n",
|
||
|
"\n",
|
||
|
" del users_df, items_df\n",
|
||
|
"\n",
|
||
|
" # Shift item ids and user ids so that they are consecutive\n",
|
||
|
"\n",
|
||
|
" unique_item_ids = interactions_df['item_id'].unique()\n",
|
||
|
" self.item_id_mapping = dict(zip(unique_item_ids, list(range(len(unique_item_ids)))))\n",
|
||
|
" self.item_id_reverse_mapping = dict(zip(list(range(len(unique_item_ids))), unique_item_ids))\n",
|
||
|
" unique_user_ids = interactions_df['user_id'].unique()\n",
|
||
|
" self.user_id_mapping = dict(zip(unique_user_ids, list(range(len(unique_user_ids)))))\n",
|
||
|
" self.user_id_reverse_mapping = dict(zip(list(range(len(unique_user_ids))), unique_user_ids))\n",
|
||
|
"\n",
|
||
|
" interactions_df = interactions_df.copy()\n",
|
||
|
" interactions_df.replace({'item_id': self.item_id_mapping, 'user_id': self.user_id_mapping}, inplace=True)\n",
|
||
|
"\n",
|
||
|
" # Get the number of items and users\n",
|
||
|
"\n",
|
||
|
" self.interactions_df = interactions_df.copy()\n",
|
||
|
" n_users = np.max(interactions_df['user_id']) + 1\n",
|
||
|
" n_items = np.max(interactions_df['item_id']) + 1\n",
|
||
|
"\n",
|
||
|
" # Get the user-item interaction matrix (mapping to int is necessary because of how iterrows works)\n",
|
||
|
" r = np.zeros(shape=(n_users, n_items))\n",
|
||
|
" for idx, interaction in interactions_df.iterrows():\n",
|
||
|
" r[int(interaction['user_id'])][int(interaction['item_id'])] = 1\n",
|
||
|
"\n",
|
||
|
" self.r = r\n",
|
||
|
" \n",
|
||
|
" # Indicate positive interactions\n",
|
||
|
" \n",
|
||
|
" interactions_df.loc[:, 'interacted'] = 1\n",
|
||
|
"\n",
|
||
|
" # Generate negative interactions\n",
|
||
|
" negative_interactions = []\n",
|
||
|
"\n",
|
||
|
" i = 0\n",
|
||
|
" while i < self.n_neg_per_pos * len(interactions_df):\n",
|
||
|
" sample_size = 1000\n",
|
||
|
" user_ids = self.rng.choice(np.arange(n_users), size=sample_size)\n",
|
||
|
" item_ids = self.rng.choice(np.arange(n_items), size=sample_size)\n",
|
||
|
"\n",
|
||
|
" j = 0\n",
|
||
|
" while j < sample_size and i < self.n_neg_per_pos * len(interactions_df):\n",
|
||
|
" if r[user_ids[j]][item_ids[j]] == 0:\n",
|
||
|
" negative_interactions.append([user_ids[j], item_ids[j], 0])\n",
|
||
|
" i += 1\n",
|
||
|
" j += 1\n",
|
||
|
" \n",
|
||
|
" interactions_df = pd.concat(\n",
|
||
|
" [interactions_df, pd.DataFrame(negative_interactions, columns=['user_id', 'item_id', 'interacted'])])\n",
|
||
|
" interactions_df = interactions_df.reset_index(drop=True)\n",
|
||
|
" \n",
|
||
|
" # Initialize losses and loss visualization\n",
|
||
|
" \n",
|
||
|
" if self.print_type is not None and self.print_type == 'live':\n",
|
||
|
" liveloss = PlotLosses()\n",
|
||
|
"\n",
|
||
|
" training_losses = deque(maxlen=50)\n",
|
||
|
" training_avg_losses = []\n",
|
||
|
" training_epoch_losses = []\n",
|
||
|
" validation_losses = deque(maxlen=50)\n",
|
||
|
" validation_avg_losses = []\n",
|
||
|
" validation_epoch_losses = []\n",
|
||
|
" last_training_total_loss = 0.0\n",
|
||
|
" last_validation_total_loss = 0.0\n",
|
||
|
" \n",
|
||
|
" # Initialize the network\n",
|
||
|
" \n",
|
||
|
" self.nn_model = GMFModel(n_items, n_users, self.embedding_dim, self.seed)\n",
|
||
|
" self.nn_model.train()\n",
|
||
|
" self.nn_model.to(self.device)\n",
|
||
|
" self.optimizer = optim.Adam(self.nn_model.parameters(), lr=self.lr, weight_decay=self.weight_decay)\n",
|
||
|
" \n",
|
||
|
" # Split the data\n",
|
||
|
" \n",
|
||
|
" if self.train:\n",
|
||
|
" interaction_ids = self.rng.permutation(len(interactions_df))\n",
|
||
|
" train_validation_slice_idx = int(len(interactions_df) * (1 - self.validation_set_size))\n",
|
||
|
" training_ids = interaction_ids[:train_validation_slice_idx]\n",
|
||
|
" validation_ids = interaction_ids[train_validation_slice_idx:]\n",
|
||
|
" else:\n",
|
||
|
" interaction_ids = self.rng.permutation(len(interactions_df))\n",
|
||
|
" training_ids = interaction_ids\n",
|
||
|
" validation_ids = []\n",
|
||
|
" \n",
|
||
|
" # Train the model\n",
|
||
|
" \n",
|
||
|
" for epoch in range(self.n_epochs):\n",
|
||
|
" if self.print_type is not None and self.print_type == 'live':\n",
|
||
|
" logs = {}\n",
|
||
|
" \n",
|
||
|
" # Train\n",
|
||
|
" \n",
|
||
|
" training_losses.clear()\n",
|
||
|
" training_total_loss = 0.0\n",
|
||
|
" \n",
|
||
|
" self.rng.shuffle(training_ids)\n",
|
||
|
" \n",
|
||
|
" batch_idx = 0\n",
|
||
|
" n_batches = int(np.ceil(len(training_ids) / self.batch_size))\n",
|
||
|
" \n",
|
||
|
" for batch_idx in range(n_batches):\n",
|
||
|
" \n",
|
||
|
" batch_ids = training_ids[(batch_idx * self.batch_size):((batch_idx + 1) * self.batch_size)]\n",
|
||
|
" \n",
|
||
|
" batch = interactions_df.loc[batch_ids]\n",
|
||
|
" batch_input = torch.from_numpy(batch.loc[:, ['user_id', 'item_id']].values).long().to(self.device)\n",
|
||
|
" y_target = torch.from_numpy(batch.loc[:, ['interacted']].values).float().to(self.device)\n",
|
||
|
" \n",
|
||
|
" # Create responses\n",
|
||
|
"\n",
|
||
|
" y = self.nn_model(batch_input).clip(0.000001, 0.999999)\n",
|
||
|
"\n",
|
||
|
" # Define loss and backpropagate\n",
|
||
|
"\n",
|
||
|
" self.optimizer.zero_grad()\n",
|
||
|
" loss = -(y_target * y.log() + (1 - y_target) * (1 - y).log()).sum()\n",
|
||
|
" \n",
|
||
|
" loss.backward()\n",
|
||
|
" self.optimizer.step()\n",
|
||
|
" \n",
|
||
|
" training_total_loss += loss.item()\n",
|
||
|
" \n",
|
||
|
" if self.print_type is not None and self.print_type == 'text':\n",
|
||
|
" print(\"\\rEpoch: {}\\tBatch: {}\\tLast epoch - avg training loss: {:.2f} avg validation loss: {:.2f} loss: {}\".format(\n",
|
||
|
" epoch, batch_idx, last_training_total_loss, last_validation_total_loss, loss), end=\"\")\n",
|
||
|
" \n",
|
||
|
" training_losses.append(loss.item())\n",
|
||
|
" training_avg_losses.append(np.mean(training_losses))\n",
|
||
|
" \n",
|
||
|
" # Validate\n",
|
||
|
"\n",
|
||
|
" validation_total_loss = 0.0\n",
|
||
|
" \n",
|
||
|
" batch = interactions_df.loc[validation_ids]\n",
|
||
|
" batch_input = torch.from_numpy(batch.loc[:, ['user_id', 'item_id']].values).long().to(self.device)\n",
|
||
|
" y_target = torch.from_numpy(batch.loc[:, ['interacted']].values).float().to(self.device)\n",
|
||
|
" \n",
|
||
|
" # Create responses\n",
|
||
|
"\n",
|
||
|
" y = self.nn_model(batch_input).clip(0.000001, 0.999999)\n",
|
||
|
"\n",
|
||
|
" # Calculate validation loss\n",
|
||
|
"\n",
|
||
|
" loss = -(y_target * y.log() + (1 - y_target) * (1 - y).log()).sum()\n",
|
||
|
" validation_total_loss += loss.item()\n",
|
||
|
" \n",
|
||
|
" # Save and print epoch losses\n",
|
||
|
" \n",
|
||
|
" training_last_avg_loss = training_total_loss / len(training_ids)\n",
|
||
|
" validation_last_avg_loss = validation_total_loss / len(validation_ids)\n",
|
||
|
"\n",
|
||
|
" if self.print_type is not None and self.print_type == 'live' and epoch >= 0:\n",
|
||
|
" # A bound on epoch prevents showing extremely high losses in the first epochs\n",
|
||
|
" logs['loss'] = training_last_avg_loss\n",
|
||
|
" logs['val_loss'] = validation_last_avg_loss\n",
|
||
|
" liveloss.update(logs)\n",
|
||
|
" liveloss.send()\n",
|
||
|
"\n",
|
||
|
" # Find the most popular items for the cold start problem\n",
|
||
|
"\n",
|
||
|
" offers_count = interactions_df.loc[:, ['item_id', 'user_id']].groupby(by='item_id').count()\n",
|
||
|
" offers_count = offers_count.sort_values('user_id', ascending=False)\n",
|
||
|
" self.most_popular_items = offers_count.index\n",
|
||
|
"\n",
|
||
|
" def recommend(self, users_df, items_df, n_recommendations=1):\n",
|
||
|
" \"\"\"\n",
|
||
|
" Serving of recommendations. Scores items in items_df for each user in users_df and returns\n",
|
||
|
" top n_recommendations for each user.\n",
|
||
|
"\n",
|
||
|
" :param pd.DataFrame users_df: DataFrame with users and their features for which\n",
|
||
|
" recommendations should be generated.\n",
|
||
|
" :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.\n",
|
||
|
" :param int n_recommendations: Number of recommendations to be returned for each user.\n",
|
||
|
" :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations\n",
|
||
|
" for each user.\n",
|
||
|
" :rtype: pd.DataFrame\n",
|
||
|
" \"\"\"\n",
|
||
|
"\n",
|
||
|
" # Clean previous recommendations (iloc could be used alternatively)\n",
|
||
|
" self.recommender_df = self.recommender_df[:0]\n",
|
||
|
"\n",
|
||
|
" # Handle users not in the training data\n",
|
||
|
"\n",
|
||
|
" # Map item ids\n",
|
||
|
"\n",
|
||
|
" items_df = items_df.copy()\n",
|
||
|
" items_df = items_df.loc[items_df['item_id'].isin(self.item_id_mapping)]\n",
|
||
|
" items_df.replace({'item_id': self.item_id_mapping}, inplace=True)\n",
|
||
|
"\n",
|
||
|
" # Generate recommendations\n",
|
||
|
"\n",
|
||
|
" for idx, user in users_df.iterrows():\n",
|
||
|
" recommendations = []\n",
|
||
|
"\n",
|
||
|
" user_id = user['user_id']\n",
|
||
|
"\n",
|
||
|
" if user_id in self.user_id_mapping:\n",
|
||
|
" \n",
|
||
|
" mapped_user_id = self.user_id_mapping[user_id]\n",
|
||
|
" \n",
|
||
|
" ids_list = items_df['item_id'].tolist()\n",
|
||
|
" id_to_pos = np.array([0]*len(ids_list))\n",
|
||
|
" for k in range(len(ids_list)):\n",
|
||
|
" id_to_pos[ids_list[k]] = k\n",
|
||
|
" \n",
|
||
|
" net_input = torch.tensor(list(zip([mapped_user_id]*len(ids_list), ids_list))).to(self.device)\n",
|
||
|
" \n",
|
||
|
" scores = self.nn_model(net_input).flatten().detach().cpu().numpy()\n",
|
||
|
" \n",
|
||
|
" # Choose n recommendations based on highest scores\n",
|
||
|
" if not self.should_recommend_already_bought:\n",
|
||
|
" x_list = self.interactions_df.loc[\n",
|
||
|
" self.interactions_df['user_id'] == mapped_user_id]['item_id'].tolist()\n",
|
||
|
" scores[id_to_pos[x_list]] = -np.inf\n",
|
||
|
"\n",
|
||
|
" chosen_pos = np.argsort(-scores)[:n_recommendations]\n",
|
||
|
"\n",
|
||
|
" for item_pos in chosen_pos:\n",
|
||
|
" recommendations.append(\n",
|
||
|
" {\n",
|
||
|
" 'user_id': self.user_id_reverse_mapping[mapped_user_id],\n",
|
||
|
" 'item_id': self.item_id_reverse_mapping[ids_list[item_pos]],\n",
|
||
|
" 'score': scores[item_pos]\n",
|
||
|
" }\n",
|
||
|
" )\n",
|
||
|
" else: # For new users recommend most popular items\n",
|
||
|
" for i in range(n_recommendations):\n",
|
||
|
" recommendations.append(\n",
|
||
|
" {\n",
|
||
|
" 'user_id': user['user_id'],\n",
|
||
|
" 'item_id': self.item_id_reverse_mapping[self.most_popular_items[i]],\n",
|
||
|
" 'score': 1.0\n",
|
||
|
" }\n",
|
||
|
" )\n",
|
||
|
"\n",
|
||
|
" user_recommendations = pd.DataFrame(recommendations)\n",
|
||
|
"\n",
|
||
|
" self.recommender_df = pd.concat([self.recommender_df, user_recommendations])\n",
|
||
|
"\n",
|
||
|
" return self.recommender_df\n",
|
||
|
" \n",
|
||
|
" def get_user_repr(self, user_id):\n",
|
||
|
" mapped_user_id = self.user_id_mapping[user_id]\n",
|
||
|
" return self.nn_model.user_embedding(torch.tensor(mapped_user_id).to(self.device)).detach().cpu().numpy()\n",
|
||
|
" \n",
|
||
|
" def get_item_repr(self, item_id):\n",
|
||
|
" mapped_item_id = self.item_id_mapping[item_id]\n",
|
||
|
" return self.nn_model.item_embedding(torch.tensor(mapped_item_id).to(self.device)).detach().cpu().numpy()\n",
|
||
|
"\n",
|
||
|
" \n",
|
||
|
"class MLPModel(nn.Module):\n",
|
||
|
" def __init__(self, n_items, n_users, embedding_dim, seed):\n",
|
||
|
" super().__init__()\n",
|
||
|
"\n",
|
||
|
" self.seed = torch.manual_seed(seed)\n",
|
||
|
" self.item_embedding = nn.Embedding(n_items, embedding_dim)\n",
|
||
|
" self.user_embedding = nn.Embedding(n_users, embedding_dim)\n",
|
||
|
" self.fc1 = nn.Linear(2 * embedding_dim, 32, bias=False)\n",
|
||
|
" self.fc2 = nn.Linear(32, 16, bias=False)\n",
|
||
|
" self.fc3 = nn.Linear(16, 1, bias=False)\n",
|
||
|
"\n",
|
||
|
" def forward(self, x):\n",
|
||
|
" user = x[:, 0]\n",
|
||
|
" item = x[:, 1]\n",
|
||
|
" user_embedding = self.user_embedding(user)\n",
|
||
|
" item_embedding = self.item_embedding(item)\n",
|
||
|
" x = torch.cat([user_embedding, item_embedding], dim=1)\n",
|
||
|
" x = torch.relu(self.fc1(x))\n",
|
||
|
" x = torch.relu(self.fc2(x))\n",
|
||
|
" x = torch.sigmoid(self.fc3(x))\n",
|
||
|
"\n",
|
||
|
" return x\n",
|
||
|
"\n",
|
||
|
" \n",
|
||
|
"class NeuMFModel(nn.Module):\n",
|
||
|
" def __init__(self, n_items, n_users, gmf_embedding_dim, mlp_embedding_dim, seed):\n",
|
||
|
" super().__init__()\n",
|
||
|
"\n",
|
||
|
" self.seed = torch.manual_seed(seed)\n",
|
||
|
"\n",
|
||
|
" # GMF\n",
|
||
|
"\n",
|
||
|
" self.gmf_user_embedding = nn.Embedding(n_users, gmf_embedding_dim)\n",
|
||
|
" self.gmf_item_embedding = nn.Embedding(n_items, gmf_embedding_dim)\n",
|
||
|
"\n",
|
||
|
" # MLP\n",
|
||
|
"\n",
|
||
|
" self.mlp_user_embedding = nn.Embedding(n_users, mlp_embedding_dim)\n",
|
||
|
" self.mlp_item_embedding = nn.Embedding(n_items, mlp_embedding_dim)\n",
|
||
|
" self.mlp_fc1 = nn.Linear(2 * mlp_embedding_dim, 32, bias=False)\n",
|
||
|
" self.mlp_fc2 = nn.Linear(32, 16, bias=False)\n",
|
||
|
"\n",
|
||
|
" # Merge\n",
|
||
|
"\n",
|
||
|
" self.fc = nn.Linear(32, 1, bias=False)\n",
|
||
|
"\n",
|
||
|
" def forward(self, x):\n",
|
||
|
" user = x[:, 0]\n",
|
||
|
" item = x[:, 1]\n",
|
||
|
"\n",
|
||
|
" # GMF\n",
|
||
|
"\n",
|
||
|
" gmf_user_embedding = self.gmf_user_embedding(user)\n",
|
||
|
" gmf_item_embedding = self.gmf_item_embedding(item)\n",
|
||
|
" gmf_x = gmf_user_embedding * gmf_item_embedding\n",
|
||
|
"\n",
|
||
|
" # MLP\n",
|
||
|
"\n",
|
||
|
" mlp_user_embedding = self.mlp_user_embedding(user)\n",
|
||
|
" mlp_item_embedding = self.mlp_item_embedding(item)\n",
|
||
|
" mlp_x = torch.cat([mlp_user_embedding, mlp_item_embedding], dim=1)\n",
|
||
|
" mlp_x = torch.relu(self.mlp_fc1(mlp_x))\n",
|
||
|
" mlp_x = torch.relu(self.mlp_fc2(mlp_x))\n",
|
||
|
"\n",
|
||
|
" # Final score\n",
|
||
|
"\n",
|
||
|
" x = torch.cat([gmf_x, mlp_x], dim=1)\n",
|
||
|
" x = torch.sigmoid(self.fc(x))\n",
|
||
|
"\n",
|
||
|
" return x"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "expensive-offering",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Quick test of the recommender (training)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 42,
|
||
|
"id": "nonprofit-roads",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAbkAAAI4CAYAAAD3UJfIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABN6klEQVR4nO3deXzU1b3/8deZmez7BglZCCCEJewJBhHRqhXc96Vqq63aWq31drXtvfXW297bvdb+tNZa7eZStO7iUnetC4Qt7IusCQHCkkDInjm/P2YICSQQyCTfWd7PxyOPyZz5zsxnSODN+Z7zPcdYaxEREQlHLqcLEBER6S8KORERCVsKORERCVsKORERCVsKORERCVsKORERCVsKORERCVsKOZEBZIzZZIw5y+k6RCKFQk5ERMKWQk7EYcaYGGPMvcaYbf6ve40xMf7HMo0xLxljao0xe4wx7xtjXP7HvmuMqTLG7DfGrDHGnOnsJxEJPh6nCxARfgCUAZMACzwP/CfwX8A3gUogy39sGWCNMUXA7UCptXabMaYQcA9s2SLBTz05EeddC9xjrd1pra0BfgRc73+sFcgBhlprW62171vfgrPtQAww1hgTZa3dZK391JHqRYKYQk7EeUOAzZ3ub/a3AfwCWA+8bozZYIy5C8Baux64E/hvYKcx5kljzBBEpAuFnIjztgFDO90v8Ldhrd1vrf2mtXY4cCHwjYNjb9bax621p/qfa4GfDWzZIsFPIScy8KKMMbEHv4AngP80xmQZYzKBHwJ/BzDGnG+MOckYY4A6fKcpvcaYImPMZ/wTVJqARsDrzMcRCV4KOZGBNw9fKB38igXKgQpgGbAI+LH/2JHAG0A98BHwgLX2bXzjcT8FdgHbgUHA9wbuI4iEBqNNU0VEJFypJyciImFLISciImFLISciImFLISciImHLsWW9MjMzbWFhoVNvLyIiYWLhwoW7rLVZ3T3mWMgVFhZSXl7u1NuLiEiYMMZs7ukxna4UEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwpZATEZGwFdIh19Lm5cWl22j3WqdLERGRIBTSITdvWTVfe2Ixn/3Nu7y4dBtehZ2IiHQS0iF34cQh3P+5KbiM4WtPLGbOb9/n1eXVWKuwExGRXoacMWa2MWaNMWa9Meaubh7/jTFmif9rrTGmNuCVdsPlMpw3IYdX7zyN3149idZ2L1/5+yLO/90HvLFyh8JORCTCmWMFgTHGDawFzgYqgQXANdbalT0c/zVgsrX2i0d73ZKSElteXn5CRfekrd3Lc0u2cd+b69iyp4GJeSn8x9mjmDUqC2NMQN9LRESCgzFmobW2pLvHetOTmwast9ZusNa2AE8CFx3l+GuAJ46/zL7zuF1cPjWPN785i59eOp5d9S3c8OgCLn/wI/69fpd6diIiEaY3IZcLbO10v9LfdgRjzFBgGPBW30s7cVFuF1dPK+Dtb53O/1xcTNXeRq59+BOufuhj5m/c42RpIiIygAI98eRq4GlrbXt3DxpjbjHGlBtjymtqagL81keK9ri4vmwo73z7dO6+YCwbdh3gyj98xHUPf8LCzXv7/f1FRMRZvQm5KiC/0/08f1t3ruYopyqttQ9Za0ustSVZWVm9r7KPYqPc3DhjGO99+wx+cO4YVlXv47Lff8gNj86norJ2wOoQEZGB1ZuJJx58E0/OxBduC4DPWWtXHHbcaOBVYJjtxeBXQCaebFsC7/wUErMgcTAkDDry+5hkOGzSyYHmNv7y0SYeem8DtQ2tnD12MP9x1ijGDknuWz0iIjLgjjbxxHOsJ1tr24wxtwOvAW7gEWvtCmPMPUC5tfYF/6FXA0/2JuACpqUe6iph2yI4UAPWe+QxntjDwi+LhMRBfDVxMDdemM7LG9p5tGI5V963kdOKh3F60WCS4zwkxUaRHBvV8X1SrIcod0hfVigiEnGO2ZPrLwG/hMDbDg174MBOqN8B9TXdfO//atjVbSA22SgaicGLwYsLL4Z2XL7vrQHjwho3xuUC/61xHbz14Ha5MG4Pbrcbt9uNy+3BuA7eenC5D35F4fJ4cLk84HKDy+P/ch+6NYe1u6MgNgXi0iAu3X+bBvHpEBUXuD9HEZEQ06eeXMhwuf29tSwYPO7ox3rboWG3P/R2+HqB9Tvx7NtOVGMDLW1teFvbaG1ro7Wtnfa2Vlrb22lva6OtvZ329jba29t9X23t2PY22r1eXLYdQxtuWnAZi5t23P649ODtuO/Gi4d23MbX7jHtuLG+Nv/xvmPbcXHs/4RYdyzEpWHiDwZgatcQPPh953BMygGXeqYiEt7CJ+SOh8sNiYN8XxR3NHuAxBN8SWstzW1e9jW2sq+plX1NbRxobqOlzUtzm5fmtnaaW3v4vs3rv99OS7u3y2MtrW20trbR1tqMbdyLu7mOVFNPKvWHbtvqSW2uJ33fAbLce0g1W0mhniS7jyjb2m293rThuMq+ApM+BzFJJ/ipRUSCW/icrowQbe1eahtb2Xughb0Nrew50EJtg+/7vQ0t/nb//fpmGhvqoWlvRyimcIAsU8slnn8zxayjxZNI28TriD/1q5A21OmPJyJy3I52ulIhFwHavZZ9ja3safAFYs3+Fj7ZuJuq5e9zXsPznOv6BJexbMw8g+hTbyN/whm+cUcRkRCgkJNuWWtZs2M/Hy6qIHnZnzmrYR6p5gCrXSNYPfQ6sqdfQ8mIwXg0q1REgphCTnpl+649bH77EfLX/JkhbVvZaVOZa2azfeQ1TB9fxKyiLBJjInMYV0SCl0JOjo/XS9OaN9j/zn1k7XifZqJ4tm0Gf7fnkj5iMmePHcxZYwaRk6JLF0TEeQo5OXE1a/B+/Hvskidwtzex0DWBB5rO5i3vZMblpnL2mGwumJjD8KwTnZcqItI3Cjnpu4Y9sOgv2Pl/xOyrojYun6fd53Hv7lLaPAk8f9upFGXrUgQRGXh93U9OxHdR+an/gfn6Urj8EVIzsrmp/kEqku7ka1EvcOtjCznQ3OZ0lSIiXSjk5Pi4o6D4MrjpDfjSG7gKyrjN+zjJu5fy/WeXaWNaEQkqCjk5cfmlcMWjEJfGvdmv8fySbTw+f4vTVYmIdFDISd/EJMEpX6Nwz7+5YehufvTCSpZX1TldlYgIoJCTQJh2C8Sl8f2EF0hPiOarjy1iX1P3a2aKiAwkhZz0XUwSTL+d6A3/4tHPuthW28h3nqrQ+JyIOE4hJ4Ex7RaITWXM2gf57uzRvLpiO4/+e5PTVYlIhFPISWDEJsP022Htq9w0oo6zxw7mf+etYtGWvU5XJiIRTCEngXOyrzdn3v0Zv7x8Ijmpsdz+2CL2HmhxujIRiVAKOQmc2BSYfhusfYWU2hXc/7kp7Kpv4Rtzl+D1anxORAaeQk4C6+Qv+8Lu3Z8xIS+V/zp/DG+vqeHB9z51ujIRiUAKOQms2BQouw3WzIPqpVxXNpTzJ+Twy9fW8PGG3U5XJyIRRiEngXewN/fOzzDG8NPLJlCYkcAdTyymZn+z09WJSARRyEngxaVC2VdhzctQXUFijIf7r51CXWMrd/5jMe0anxORAaKQk/5x8lcgxjc2BzAmJ5n/uaiYf6/fzX1vrnO4OBGJFAo56R9xqVB2K6x+CbYvA+CKkjwum5LHfW+t4/11Nc7WJyIRQSEn/afsKxCT3NGbM8bwPxePY+SgRO58cgnb65ocLlBEwp1CTvpPXJqvN7fqRdi+HID4aA8PXDuFxtZ2vvbEItravQ4XKSLhTCEn/avs1i69OYCTBiXxf5eOZ8Gmvfzy9bUOFici4U4hJ/0rLs03CWXVC7BjRUfzRZNy+dzJBTz47qe8uWqHgwWKSDhTyEn/K7sVopO69OYAfnj+WMYNSeYbc5eydU+DQ8WJSDhTyEn/i0/3XSC+8nnYsbKjOTbKzQPXTsHrtdz++CJa2jQ+JyKBpZCTgTH9tm57c0M
|
||
|
"text/plain": [
|
||
|
"<Figure size 864x576 with 2 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {
|
||
|
"needs_background": "light"
|
||
|
},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Loss\n",
|
||
|
"\ttraining \t (min: 0.130, max: 0.706, cur: 0.130)\n",
|
||
|
"\tvalidation \t (min: 0.224, max: 0.696, cur: 0.226)\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"gmf_recommender = GMFRecommender(print_type='live', n_neg_per_pos=10, batch_size=16, \n",
|
||
|
" embedding_dim=6, lr=0.001, weight_decay=0.0001, n_epochs=20, seed=1)\n",
|
||
|
"gmf_recommender.fit(ml_ratings_df, None, ml_movies_df)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "incorporated-messaging",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Quick test of the recommender (recommending)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"id": "accessible-value",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Recommendations\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>user_id</th>\n",
|
||
|
" <th>item_id</th>\n",
|
||
|
" <th>score</th>\n",
|
||
|
" <th>title</th>\n",
|
||
|
" <th>genres</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>4896</td>\n",
|
||
|
" <td>0.768898</td>\n",
|
||
|
" <td>Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)</td>\n",
|
||
|
" <td>Adventure|Children|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>435</td>\n",
|
||
|
" <td>0.650600</td>\n",
|
||
|
" <td>Coneheads (1993)</td>\n",
|
||
|
" <td>Comedy|Sci-Fi</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>41566</td>\n",
|
||
|
" <td>0.609373</td>\n",
|
||
|
" <td>Chronicles of Narnia: The Lion, the Witch and the Wardrobe, The (2005)</td>\n",
|
||
|
" <td>Adventure|Children|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>6502</td>\n",
|
||
|
" <td>0.535332</td>\n",
|
||
|
" <td>28 Days Later (2002)</td>\n",
|
||
|
" <td>Action|Horror|Sci-Fi</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>145</td>\n",
|
||
|
" <td>0.441272</td>\n",
|
||
|
" <td>Bad Boys (1995)</td>\n",
|
||
|
" <td>Action|Comedy|Crime|Drama|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>6537</td>\n",
|
||
|
" <td>0.432268</td>\n",
|
||
|
" <td>Terminator 3: Rise of the Machines (2003)</td>\n",
|
||
|
" <td>Action|Adventure|Sci-Fi</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>355</td>\n",
|
||
|
" <td>0.421626</td>\n",
|
||
|
" <td>Flintstones, The (1994)</td>\n",
|
||
|
" <td>Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>5673</td>\n",
|
||
|
" <td>0.242538</td>\n",
|
||
|
" <td>Punch-Drunk Love (2002)</td>\n",
|
||
|
" <td>Comedy|Drama|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>481</td>\n",
|
||
|
" <td>0.218651</td>\n",
|
||
|
" <td>Kalifornia (1993)</td>\n",
|
||
|
" <td>Drama|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>267</td>\n",
|
||
|
" <td>0.213728</td>\n",
|
||
|
" <td>Major Payne (1995)</td>\n",
|
||
|
" <td>Comedy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>10</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>780</td>\n",
|
||
|
" <td>0.858898</td>\n",
|
||
|
" <td>Independence Day (a.k.a. ID4) (1996)</td>\n",
|
||
|
" <td>Action|Adventure|Sci-Fi|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>11</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>435</td>\n",
|
||
|
" <td>0.634766</td>\n",
|
||
|
" <td>Coneheads (1993)</td>\n",
|
||
|
" <td>Comedy|Sci-Fi</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>12</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>41566</td>\n",
|
||
|
" <td>0.597829</td>\n",
|
||
|
" <td>Chronicles of Narnia: The Lion, the Witch and the Wardrobe, The (2005)</td>\n",
|
||
|
" <td>Adventure|Children|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>13</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>6502</td>\n",
|
||
|
" <td>0.531417</td>\n",
|
||
|
" <td>28 Days Later (2002)</td>\n",
|
||
|
" <td>Action|Horror|Sci-Fi</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>14</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>145</td>\n",
|
||
|
" <td>0.447853</td>\n",
|
||
|
" <td>Bad Boys (1995)</td>\n",
|
||
|
" <td>Action|Comedy|Crime|Drama|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>15</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>6537</td>\n",
|
||
|
" <td>0.439573</td>\n",
|
||
|
" <td>Terminator 3: Rise of the Machines (2003)</td>\n",
|
||
|
" <td>Action|Adventure|Sci-Fi</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>16</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>355</td>\n",
|
||
|
" <td>0.430258</td>\n",
|
||
|
" <td>Flintstones, The (1994)</td>\n",
|
||
|
" <td>Children|Comedy|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>17</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>5673</td>\n",
|
||
|
" <td>0.266561</td>\n",
|
||
|
" <td>Punch-Drunk Love (2002)</td>\n",
|
||
|
" <td>Comedy|Drama|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>18</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>481</td>\n",
|
||
|
" <td>0.243838</td>\n",
|
||
|
" <td>Kalifornia (1993)</td>\n",
|
||
|
" <td>Drama|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>19</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>267</td>\n",
|
||
|
" <td>0.239114</td>\n",
|
||
|
" <td>Major Payne (1995)</td>\n",
|
||
|
" <td>Comedy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>20</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>4896</td>\n",
|
||
|
" <td>0.687780</td>\n",
|
||
|
" <td>Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)</td>\n",
|
||
|
" <td>Adventure|Children|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>21</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>41566</td>\n",
|
||
|
" <td>0.572620</td>\n",
|
||
|
" <td>Chronicles of Narnia: The Lion, the Witch and the Wardrobe, The (2005)</td>\n",
|
||
|
" <td>Adventure|Children|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>22</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>1500</td>\n",
|
||
|
" <td>0.572483</td>\n",
|
||
|
" <td>Grosse Pointe Blank (1997)</td>\n",
|
||
|
" <td>Comedy|Crime|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>23</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>6502</td>\n",
|
||
|
" <td>0.523220</td>\n",
|
||
|
" <td>28 Days Later (2002)</td>\n",
|
||
|
" <td>Action|Horror|Sci-Fi</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>24</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>6537</td>\n",
|
||
|
" <td>0.455307</td>\n",
|
||
|
" <td>Terminator 3: Rise of the Machines (2003)</td>\n",
|
||
|
" <td>Action|Adventure|Sci-Fi</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>25</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>5673</td>\n",
|
||
|
" <td>0.321320</td>\n",
|
||
|
" <td>Punch-Drunk Love (2002)</td>\n",
|
||
|
" <td>Comedy|Drama|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>26</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>481</td>\n",
|
||
|
" <td>0.302354</td>\n",
|
||
|
" <td>Kalifornia (1993)</td>\n",
|
||
|
" <td>Drama|Thriller</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>27</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>4890</td>\n",
|
||
|
" <td>0.270704</td>\n",
|
||
|
" <td>Shallow Hal (2001)</td>\n",
|
||
|
" <td>Comedy|Fantasy|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>28</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>5954</td>\n",
|
||
|
" <td>0.261981</td>\n",
|
||
|
" <td>25th Hour (2002)</td>\n",
|
||
|
" <td>Crime|Drama</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>29</th>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>3468</td>\n",
|
||
|
" <td>0.239384</td>\n",
|
||
|
" <td>Hustler, The (1961)</td>\n",
|
||
|
" <td>Drama</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"recommendations = gmf_recommender.recommend(pd.DataFrame([[1], [4], [6]], columns=['user_id']), ml_movies_df, 10)\n",
|
||
|
"\n",
|
||
|
"recommendations = pd.merge(recommendations, ml_movies_df, on='item_id', how='left')\n",
|
||
|
"print(\"Recommendations\")\n",
|
||
|
"display(HTML(recommendations.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "documentary-barcelona",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## User and item representations"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 8,
|
||
|
"id": "balanced-detective",
|
||
|
"metadata": {
|
||
|
"scrolled": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"User id=1\n",
|
||
|
"[ 8.8694301e-03 -1.1293894e-09 7.6482260e-01 6.5688614e-06\n",
|
||
|
" 6.1402158e-03 -3.4989858e-10 3.0581679e-05 1.6342730e-05]\n",
|
||
|
"\n",
|
||
|
"User watched\n",
|
||
|
"['Independence Day (a.k.a. ID4) (1996)', 'Grosse Pointe Blank (1997)', 'Ladyhawke (1985)']\n",
|
||
|
"\n",
|
||
|
"User history item representations\n",
|
||
|
"Item id = 780\titem title = Independence Day (a.k.a. ID4) (1996)\n",
|
||
|
"[-2.0800237e-01 -3.2530998e-08 -7.2467870e-01 -7.6390163e-04\n",
|
||
|
" 6.0946174e-02 -1.0309565e-09 -1.6934791e-03 -3.3520073e-02]\n",
|
||
|
"Scalar product=-0.555722\n",
|
||
|
"Score=0.884161\n",
|
||
|
"\n",
|
||
|
"Item id = 1500\titem title = Grosse Pointe Blank (1997)\n",
|
||
|
"[-4.7350328e-02 -1.4992246e-09 -1.5850608e-01 -2.9982104e-05\n",
|
||
|
" 6.0663655e-02 4.1064720e-08 1.5929480e-04 1.2831817e-03]\n",
|
||
|
"Scalar product=-0.121276\n",
|
||
|
"Score=0.609364\n",
|
||
|
"\n",
|
||
|
"Item id = 3479\titem title = Ladyhawke (1985)\n",
|
||
|
"[-2.8682781e-02 6.1106755e-09 6.3241005e-01 -3.3657509e-06\n",
|
||
|
" 9.6770316e-02 9.6757424e-10 -6.0637249e-05 1.5274031e-03]\n",
|
||
|
"Scalar product=0.484021\n",
|
||
|
"Score=0.145174\n",
|
||
|
"\n",
|
||
|
"===============\n",
|
||
|
"Item id = 145\titem title = Bad Boys (1995)\n",
|
||
|
"[-9.6727222e-02 1.2952676e-09 8.4303088e-02 1.5707446e-05\n",
|
||
|
" 9.7245917e-02 -9.5372132e-10 -9.6978983e-05 1.0601738e-02]\n",
|
||
|
"Scalar product=0.064216\n",
|
||
|
"Score=0.441272\n",
|
||
|
"\n",
|
||
|
"Item id = 171\titem title = Jeffrey (1995)\n",
|
||
|
"[ 7.6405336e-03 -6.6923184e-10 9.0268552e-01 -5.7306852e-06\n",
|
||
|
" -1.5152089e-02 -9.7515729e-10 -1.3149886e-04 4.9494698e-08]\n",
|
||
|
"Scalar product=0.690369\n",
|
||
|
"Score=0.073709\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"user_id = 1\n",
|
||
|
"user_repr = gmf_recommender.get_user_repr(user_id=user_id)\n",
|
||
|
"print(\"User id={}\".format(user_id))\n",
|
||
|
"print(user_repr)\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"print(\"User watched\")\n",
|
||
|
"print(ml_df.loc[ml_df['user_id'] == user_id, 'title'].tolist())\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"print('User history item representations')\n",
|
||
|
"for item_id in ml_df.loc[ml_df['user_id'] == user_id, 'item_id'].tolist():\n",
|
||
|
" item_repr = gmf_recommender.get_item_repr(item_id=item_id)\n",
|
||
|
" print(\"Item id = {}\\titem title = {}\".format(\n",
|
||
|
" item_id, ml_movies_df.loc[ml_movies_df['item_id'] == item_id, 'title'].iloc[0]))\n",
|
||
|
" print(item_repr)\n",
|
||
|
" scalar_product = np.dot(user_repr, item_repr)\n",
|
||
|
" print(\"Scalar product={:.6f}\".format(scalar_product))\n",
|
||
|
" score = gmf_recommender.nn_model(\n",
|
||
|
" torch.tensor([[gmf_recommender.user_id_mapping[user_id], \n",
|
||
|
" gmf_recommender.item_id_mapping[item_id]]]).to(gmf_recommender.device)).flatten().detach().cpu().item()\n",
|
||
|
" print(\"Score={:.6f}\".format(score))\n",
|
||
|
" print()\n",
|
||
|
"\n",
|
||
|
"print(\"===============\")\n",
|
||
|
" \n",
|
||
|
"item_id = 145\n",
|
||
|
"item_repr = gmf_recommender.get_item_repr(item_id=item_id)\n",
|
||
|
"print(\"Item id = {}\\titem title = {}\".format(item_id, ml_movies_df.loc[ml_movies_df['item_id'] == item_id, 'title'].iloc[0]))\n",
|
||
|
"print(item_repr)\n",
|
||
|
"score = np.dot(user_repr, item_repr)\n",
|
||
|
"print(\"Scalar product={:.6f}\".format(score))\n",
|
||
|
"score = gmf_recommender.nn_model(\n",
|
||
|
" torch.tensor([[gmf_recommender.user_id_mapping[user_id], \n",
|
||
|
" gmf_recommender.item_id_mapping[item_id]]]).to(gmf_recommender.device)).flatten().detach().cpu().item()\n",
|
||
|
"print(\"Score={:.6f}\".format(score))\n",
|
||
|
"print()\n",
|
||
|
"\n",
|
||
|
"item_id = 171\n",
|
||
|
"item_repr = gmf_recommender.get_item_repr(item_id=item_id)\n",
|
||
|
"print(\"Item id = {}\\titem title = {}\".format(item_id, ml_movies_df.loc[ml_movies_df['item_id'] == item_id, 'title'].iloc[0]))\n",
|
||
|
"print(item_repr)\n",
|
||
|
"score = np.dot(user_repr, item_repr)\n",
|
||
|
"print(\"Scalar product={:.6f}\".format(score))\n",
|
||
|
"score = gmf_recommender.nn_model(\n",
|
||
|
" torch.tensor([[gmf_recommender.user_id_mapping[user_id], \n",
|
||
|
" gmf_recommender.item_id_mapping[item_id]]]).to(gmf_recommender.device)).flatten().detach().cpu().item()\n",
|
||
|
"print(\"Score={:.6f}\".format(score))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "framed-negative",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Training-test split evaluation"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 9,
|
||
|
"id": "amended-future",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from evaluation_and_testing.testing import evaluate_train_test_split_implicit"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 43,
|
||
|
"id": "unsigned-video",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>GMFRecommender</td>\n",
|
||
|
" <td>0.292208</td>\n",
|
||
|
" <td>0.487013</td>\n",
|
||
|
" <td>0.662338</td>\n",
|
||
|
" <td>0.805195</td>\n",
|
||
|
" <td>0.292208</td>\n",
|
||
|
" <td>0.404914</td>\n",
|
||
|
" <td>0.477292</td>\n",
|
||
|
" <td>0.52351</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"gmf_recommender = GMFRecommender(n_neg_per_pos=10, batch_size=16, \n",
|
||
|
" embedding_dim=6, lr=0.001, weight_decay=0.0001, n_epochs=20)\n",
|
||
|
"\n",
|
||
|
"gmf_tts_results = [['GMFRecommender'] + list(evaluate_train_test_split_implicit(\n",
|
||
|
" gmf_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]\n",
|
||
|
"\n",
|
||
|
"gmf_tts_results = pd.DataFrame(\n",
|
||
|
" gmf_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(gmf_tts_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 14,
|
||
|
"id": "romantic-music",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>NetflixRecommender</td>\n",
|
||
|
" <td>0.292208</td>\n",
|
||
|
" <td>0.538961</td>\n",
|
||
|
" <td>0.733766</td>\n",
|
||
|
" <td>0.948052</td>\n",
|
||
|
" <td>0.292208</td>\n",
|
||
|
" <td>0.434289</td>\n",
|
||
|
" <td>0.514203</td>\n",
|
||
|
" <td>0.583217</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from recommenders.netflix_recommender import NetflixRecommender\n",
|
||
|
"\n",
|
||
|
"netflix_recommender = NetflixRecommender(n_epochs=150)\n",
|
||
|
"\n",
|
||
|
"netflix_tts_results = [['NetflixRecommender'] + list(evaluate_train_test_split_implicit(\n",
|
||
|
" netflix_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]\n",
|
||
|
"\n",
|
||
|
"netflix_tts_results = pd.DataFrame(\n",
|
||
|
" netflix_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(netflix_tts_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 15,
|
||
|
"id": "standing-tiffany",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>AmazonRecommender</td>\n",
|
||
|
" <td>0.181818</td>\n",
|
||
|
" <td>0.311688</td>\n",
|
||
|
" <td>0.402597</td>\n",
|
||
|
" <td>0.551948</td>\n",
|
||
|
" <td>0.181818</td>\n",
|
||
|
" <td>0.257806</td>\n",
|
||
|
" <td>0.294682</td>\n",
|
||
|
" <td>0.34147</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from recommenders.amazon_recommender import AmazonRecommender\n",
|
||
|
"\n",
|
||
|
"amazon_recommender = AmazonRecommender()\n",
|
||
|
"\n",
|
||
|
"amazon_tts_results = [['AmazonRecommender'] + list(evaluate_train_test_split_implicit(\n",
|
||
|
" amazon_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]\n",
|
||
|
"\n",
|
||
|
"amazon_tts_results = pd.DataFrame(\n",
|
||
|
" amazon_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(amazon_tts_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 16,
|
||
|
"id": "saving-harrison",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>TFIDFRecommender</td>\n",
|
||
|
" <td>0.025974</td>\n",
|
||
|
" <td>0.090909</td>\n",
|
||
|
" <td>0.136364</td>\n",
|
||
|
" <td>0.318182</td>\n",
|
||
|
" <td>0.025974</td>\n",
|
||
|
" <td>0.064393</td>\n",
|
||
|
" <td>0.083685</td>\n",
|
||
|
" <td>0.140799</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from recommenders.tfidf_recommender import TFIDFRecommender\n",
|
||
|
"\n",
|
||
|
"tfidf_recommender = TFIDFRecommender()\n",
|
||
|
"\n",
|
||
|
"tfidf_tts_results = [['TFIDFRecommender'] + list(evaluate_train_test_split_implicit(\n",
|
||
|
" tfidf_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df))]\n",
|
||
|
"\n",
|
||
|
"tfidf_tts_results = pd.DataFrame(\n",
|
||
|
" tfidf_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(tfidf_tts_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 44,
|
||
|
"id": "random-source",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>GMFRecommender</td>\n",
|
||
|
" <td>0.292208</td>\n",
|
||
|
" <td>0.487013</td>\n",
|
||
|
" <td>0.662338</td>\n",
|
||
|
" <td>0.805195</td>\n",
|
||
|
" <td>0.292208</td>\n",
|
||
|
" <td>0.404914</td>\n",
|
||
|
" <td>0.477292</td>\n",
|
||
|
" <td>0.523510</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>NetflixRecommender</td>\n",
|
||
|
" <td>0.292208</td>\n",
|
||
|
" <td>0.538961</td>\n",
|
||
|
" <td>0.733766</td>\n",
|
||
|
" <td>0.948052</td>\n",
|
||
|
" <td>0.292208</td>\n",
|
||
|
" <td>0.434289</td>\n",
|
||
|
" <td>0.514203</td>\n",
|
||
|
" <td>0.583217</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>AmazonRecommender</td>\n",
|
||
|
" <td>0.181818</td>\n",
|
||
|
" <td>0.311688</td>\n",
|
||
|
" <td>0.402597</td>\n",
|
||
|
" <td>0.551948</td>\n",
|
||
|
" <td>0.181818</td>\n",
|
||
|
" <td>0.257806</td>\n",
|
||
|
" <td>0.294682</td>\n",
|
||
|
" <td>0.341470</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>TFIDFRecommender</td>\n",
|
||
|
" <td>0.025974</td>\n",
|
||
|
" <td>0.090909</td>\n",
|
||
|
" <td>0.136364</td>\n",
|
||
|
" <td>0.318182</td>\n",
|
||
|
" <td>0.025974</td>\n",
|
||
|
" <td>0.064393</td>\n",
|
||
|
" <td>0.083685</td>\n",
|
||
|
" <td>0.140799</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"tts_results = pd.concat([gmf_tts_results, netflix_tts_results, amazon_tts_results, tfidf_tts_results]).reset_index(drop=True)\n",
|
||
|
"display(HTML(tts_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "continued-harassment",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Leave-one-out evaluation"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 30,
|
||
|
"id": "exact-stuff",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from evaluation_and_testing.testing import evaluate_leave_one_out_implicit"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "divided-resistance",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"gmf_recommender = GMFRecommender(n_epochs=10)\n",
|
||
|
"\n",
|
||
|
"gmf_loo_results = [['NetflixRecommender'] + list(evaluate_leave_one_out_implicit(\n",
|
||
|
" gmf_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]\n",
|
||
|
"\n",
|
||
|
"gmf_loo_results = pd.DataFrame(\n",
|
||
|
" gmf_loo_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(gmf_loo_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 31,
|
||
|
"id": "prerequisite-lounge",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>UserBasedCosineNearestNeighborsRecommender</td>\n",
|
||
|
" <td>0.096667</td>\n",
|
||
|
" <td>0.146667</td>\n",
|
||
|
" <td>0.186667</td>\n",
|
||
|
" <td>0.306667</td>\n",
|
||
|
" <td>0.096667</td>\n",
|
||
|
" <td>0.124285</td>\n",
|
||
|
" <td>0.140782</td>\n",
|
||
|
" <td>0.178962</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"netflix_recommender = NetflixRecommender(n_epochs=10)\n",
|
||
|
"\n",
|
||
|
"netflix_loo_results = [['NetflixRecommender'] + list(evaluate_leave_one_out_implicit(\n",
|
||
|
" netflix_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]\n",
|
||
|
"\n",
|
||
|
"netflix_loo_results = pd.DataFrame(\n",
|
||
|
" netflix_loo_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(netflix_loo_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 35,
|
||
|
"id": "social-escape",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>AmazonRecommender</td>\n",
|
||
|
" <td>0.166667</td>\n",
|
||
|
" <td>0.256667</td>\n",
|
||
|
" <td>0.32</td>\n",
|
||
|
" <td>0.426667</td>\n",
|
||
|
" <td>0.166667</td>\n",
|
||
|
" <td>0.219086</td>\n",
|
||
|
" <td>0.245486</td>\n",
|
||
|
" <td>0.279978</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from recommenders.amazon_recommender import AmazonRecommender\n",
|
||
|
"\n",
|
||
|
"amazon_recommender = AmazonRecommender()\n",
|
||
|
"\n",
|
||
|
"amazon_loo_results = [['AmazonRecommender'] + list(evaluate_leave_one_out_implicit(\n",
|
||
|
" amazon_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]\n",
|
||
|
"\n",
|
||
|
"amazon_loo_results = pd.DataFrame(\n",
|
||
|
" amazon_loo_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(amazon_loo_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 36,
|
||
|
"id": "behind-cambodia",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>TFIDFRecommender</td>\n",
|
||
|
" <td>0.006667</td>\n",
|
||
|
" <td>0.053333</td>\n",
|
||
|
" <td>0.123333</td>\n",
|
||
|
" <td>0.233333</td>\n",
|
||
|
" <td>0.006667</td>\n",
|
||
|
" <td>0.033491</td>\n",
|
||
|
" <td>0.062178</td>\n",
|
||
|
" <td>0.096151</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"tfidf_recommender = TFIDFRecommender()\n",
|
||
|
"\n",
|
||
|
"tfidf_loo_results = [['TFIDFRecommender'] + list(evaluate_leave_one_out_implicit(\n",
|
||
|
" tfidf_recommender, ml_ratings_df.loc[:, ['user_id', 'item_id']], ml_movies_df, max_evals=300, seed=6789))]\n",
|
||
|
"\n",
|
||
|
"tfidf_loo_results = pd.DataFrame(\n",
|
||
|
" tfidf_loo_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n",
|
||
|
"\n",
|
||
|
"display(HTML(tfidf_loo_results.to_html()))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 37,
|
||
|
"id": "lightweight-password",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Recommender</th>\n",
|
||
|
" <th>HR@1</th>\n",
|
||
|
" <th>HR@3</th>\n",
|
||
|
" <th>HR@5</th>\n",
|
||
|
" <th>HR@10</th>\n",
|
||
|
" <th>NDCG@1</th>\n",
|
||
|
" <th>NDCG@3</th>\n",
|
||
|
" <th>NDCG@5</th>\n",
|
||
|
" <th>NDCG@10</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>UserBasedCosineNearestNeighborsRecommender</td>\n",
|
||
|
" <td>0.096667</td>\n",
|
||
|
" <td>0.146667</td>\n",
|
||
|
" <td>0.186667</td>\n",
|
||
|
" <td>0.306667</td>\n",
|
||
|
" <td>0.096667</td>\n",
|
||
|
" <td>0.124285</td>\n",
|
||
|
" <td>0.140782</td>\n",
|
||
|
" <td>0.178962</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>UserBasedCosineNearestNeighborsRecommender</td>\n",
|
||
|
" <td>0.100000</td>\n",
|
||
|
" <td>0.150000</td>\n",
|
||
|
" <td>0.180000</td>\n",
|
||
|
" <td>0.313333</td>\n",
|
||
|
" <td>0.100000</td>\n",
|
||
|
" <td>0.127182</td>\n",
|
||
|
" <td>0.139518</td>\n",
|
||
|
" <td>0.181748</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>UserBasedCosineNearestNeighborsRecommender</td>\n",
|
||
|
" <td>0.266667</td>\n",
|
||
|
" <td>0.420000</td>\n",
|
||
|
" <td>0.513333</td>\n",
|
||
|
" <td>0.650000</td>\n",
|
||
|
" <td>0.266667</td>\n",
|
||
|
" <td>0.357736</td>\n",
|
||
|
" <td>0.396033</td>\n",
|
||
|
" <td>0.440599</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>UserBasedCosineNearestNeighborsRecommender</td>\n",
|
||
|
" <td>0.173333</td>\n",
|
||
|
" <td>0.280000</td>\n",
|
||
|
" <td>0.336667</td>\n",
|
||
|
" <td>0.420000</td>\n",
|
||
|
" <td>0.173333</td>\n",
|
||
|
" <td>0.234522</td>\n",
|
||
|
" <td>0.257759</td>\n",
|
||
|
" <td>0.284723</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>AmazonRecommender</td>\n",
|
||
|
" <td>0.166667</td>\n",
|
||
|
" <td>0.256667</td>\n",
|
||
|
" <td>0.320000</td>\n",
|
||
|
" <td>0.426667</td>\n",
|
||
|
" <td>0.166667</td>\n",
|
||
|
" <td>0.219086</td>\n",
|
||
|
" <td>0.245486</td>\n",
|
||
|
" <td>0.279978</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>TFIDFRecommender</td>\n",
|
||
|
" <td>0.006667</td>\n",
|
||
|
" <td>0.053333</td>\n",
|
||
|
" <td>0.123333</td>\n",
|
||
|
" <td>0.233333</td>\n",
|
||
|
" <td>0.006667</td>\n",
|
||
|
" <td>0.033491</td>\n",
|
||
|
" <td>0.062178</td>\n",
|
||
|
" <td>0.096151</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"<IPython.core.display.HTML object>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"loo_results = pd.concat([gmf_loo_results, netflix_loo_results, amazon_loo_results, tfidf_loo_results]).reset_index(drop=True)\n",
|
||
|
"display(HTML(loo_results.to_html()))"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.6.9"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|