2021-05-07 22:16:28 +02:00
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"# Self made RP3-beta"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import helpers\n",
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"import numpy as np\n",
|
|
|
|
"import scipy.sparse as sparse\n",
|
|
|
|
"from collections import defaultdict\n",
|
|
|
|
"from itertools import chain\n",
|
|
|
|
"import random\n",
|
|
|
|
"import time\n",
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
"\n",
|
|
|
|
"train_read = pd.read_csv(\"./Datasets/ml-100k/train.csv\", sep=\"\\t\", header=None)\n",
|
|
|
|
"test_read = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n",
|
|
|
|
"(\n",
|
|
|
|
" train_ui,\n",
|
|
|
|
" test_ui,\n",
|
|
|
|
" user_code_id,\n",
|
|
|
|
" user_id_code,\n",
|
|
|
|
" item_code_id,\n",
|
|
|
|
" item_id_code,\n",
|
|
|
|
") = helpers.data_to_csr(train_read, test_read)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 2,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"class RP3Beta:\n",
|
|
|
|
" def fit(self, train_ui, alpha, beta):\n",
|
|
|
|
" \"\"\"We weight our edges by user's explicit ratings so if user rated movie high we'll follow that path\n",
|
|
|
|
" with higher probability.\"\"\"\n",
|
|
|
|
" self.train_ui = train_ui\n",
|
|
|
|
" self.train_iu = train_ui.transpose()\n",
|
|
|
|
"\n",
|
|
|
|
" self.alpha = alpha\n",
|
|
|
|
" self.beta = beta\n",
|
|
|
|
"\n",
|
|
|
|
" # Define Pui\n",
|
|
|
|
" Pui = sparse.csr_matrix(self.train_ui / self.train_ui.sum(axis=1))\n",
|
|
|
|
"\n",
|
|
|
|
" # Define Piu\n",
|
|
|
|
" to_divide = np.vectorize(lambda x: x if x > 0 else 1)(\n",
|
|
|
|
" self.train_iu.sum(axis=1)\n",
|
|
|
|
" ) # to avoid dividing by zero\n",
|
|
|
|
" Piu = sparse.csr_matrix(self.train_iu / to_divide)\n",
|
|
|
|
" item_orders = (self.train_ui > 0).sum(axis=0)\n",
|
|
|
|
"\n",
|
|
|
|
" Pui = Pui.power(self.alpha)\n",
|
|
|
|
" Piu = Piu.power(self.alpha)\n",
|
|
|
|
"\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" P3 = Pui * Piu * Pui \n",
|
2021-05-07 22:16:28 +02:00
|
|
|
"\n",
|
|
|
|
" P3 /= np.power(\n",
|
|
|
|
" np.vectorize(lambda x: x if x > 0 else 1)(item_orders), self.beta\n",
|
|
|
|
" )\n",
|
|
|
|
"\n",
|
|
|
|
" self.estimations = np.array(P3)\n",
|
|
|
|
"\n",
|
|
|
|
" def recommend(self, user_code_id, item_code_id, topK=10):\n",
|
|
|
|
"\n",
|
|
|
|
" top_k = defaultdict(list)\n",
|
|
|
|
" for nb_user, user in enumerate(self.estimations):\n",
|
|
|
|
"\n",
|
|
|
|
" user_rated = self.train_ui.indices[\n",
|
|
|
|
" self.train_ui.indptr[nb_user] : self.train_ui.indptr[nb_user + 1]\n",
|
|
|
|
" ]\n",
|
|
|
|
" for item, score in enumerate(user):\n",
|
|
|
|
" if item not in user_rated and not np.isnan(score):\n",
|
|
|
|
" top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n",
|
|
|
|
" result = []\n",
|
|
|
|
" # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n",
|
|
|
|
" for uid, item_scores in top_k.items():\n",
|
|
|
|
" item_scores.sort(key=lambda x: x[1], reverse=True)\n",
|
|
|
|
" result.append([uid] + list(chain(*item_scores[:topK])))\n",
|
|
|
|
" return result\n",
|
|
|
|
"\n",
|
|
|
|
" def estimate(self, user_code_id, item_code_id, test_ui):\n",
|
|
|
|
" result = []\n",
|
|
|
|
" for user, item in zip(*test_ui.nonzero()):\n",
|
|
|
|
" result.append(\n",
|
|
|
|
" [\n",
|
|
|
|
" user_code_id[user],\n",
|
|
|
|
" item_code_id[item],\n",
|
|
|
|
" self.estimations[user, item]\n",
|
|
|
|
" if not np.isnan(self.estimations[user, item])\n",
|
|
|
|
" else 1,\n",
|
|
|
|
" ]\n",
|
|
|
|
" )\n",
|
|
|
|
" return result"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 3,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"model = RP3Beta()\n",
|
|
|
|
"model.fit(train_ui, alpha=1, beta=0)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 4,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"top_n = pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
|
|
|
|
"\n",
|
|
|
|
"top_n.to_csv(\n",
|
|
|
|
" \"Recommendations generated/ml-100k/Self_P3_reco.csv\", index=False, header=False\n",
|
|
|
|
")\n",
|
|
|
|
"\n",
|
|
|
|
"estimations = pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
|
|
|
|
"estimations.to_csv(\n",
|
|
|
|
" \"Recommendations generated/ml-100k/Self_P3_estimations.csv\",\n",
|
|
|
|
" index=False,\n",
|
|
|
|
" header=False,\n",
|
|
|
|
")"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 5,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stderr",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
2021-06-12 11:14:56 +02:00
|
|
|
"943it [00:00, 12434.93it/s]\n"
|
2021-05-07 22:16:28 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>RMSE</th>\n",
|
|
|
|
" <th>MAE</th>\n",
|
|
|
|
" <th>precision</th>\n",
|
|
|
|
" <th>recall</th>\n",
|
|
|
|
" <th>F_1</th>\n",
|
|
|
|
" <th>F_05</th>\n",
|
|
|
|
" <th>precision_super</th>\n",
|
|
|
|
" <th>recall_super</th>\n",
|
|
|
|
" <th>NDCG</th>\n",
|
|
|
|
" <th>mAP</th>\n",
|
|
|
|
" <th>MRR</th>\n",
|
|
|
|
" <th>LAUC</th>\n",
|
|
|
|
" <th>HR</th>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>HitRate2</th>\n",
|
|
|
|
" <th>HitRate3</th>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <th>Reco in test</th>\n",
|
|
|
|
" <th>Test coverage</th>\n",
|
|
|
|
" <th>Shannon</th>\n",
|
|
|
|
" <th>Gini</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>3.702446</td>\n",
|
|
|
|
" <td>3.527273</td>\n",
|
|
|
|
" <td>0.282185</td>\n",
|
|
|
|
" <td>0.192092</td>\n",
|
|
|
|
" <td>0.186749</td>\n",
|
|
|
|
" <td>0.21698</td>\n",
|
|
|
|
" <td>0.204185</td>\n",
|
|
|
|
" <td>0.240096</td>\n",
|
|
|
|
" <td>0.339114</td>\n",
|
|
|
|
" <td>0.204905</td>\n",
|
|
|
|
" <td>0.572157</td>\n",
|
|
|
|
" <td>0.593544</td>\n",
|
|
|
|
" <td>0.875928</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.685048</td>\n",
|
|
|
|
" <td>0.495228</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>1.0</td>\n",
|
|
|
|
" <td>0.077201</td>\n",
|
|
|
|
" <td>3.875892</td>\n",
|
|
|
|
" <td>0.974947</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
|
|
|
" RMSE MAE precision recall F_1 F_05 \\\n",
|
|
|
|
"0 3.702446 3.527273 0.282185 0.192092 0.186749 0.21698 \n",
|
|
|
|
"\n",
|
|
|
|
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
|
|
|
|
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
|
|
|
|
"\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" HR HitRate2 HitRate3 Reco in test Test coverage Shannon \\\n",
|
|
|
|
"0 0.875928 0.685048 0.495228 1.0 0.077201 3.875892 \n",
|
|
|
|
"\n",
|
|
|
|
" Gini \n",
|
|
|
|
"0 0.974947 "
|
2021-05-07 22:16:28 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 5,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"import evaluation_measures as ev\n",
|
|
|
|
"\n",
|
|
|
|
"estimations_df = pd.read_csv(\n",
|
|
|
|
" \"Recommendations generated/ml-100k/Self_P3_estimations.csv\", header=None\n",
|
|
|
|
")\n",
|
|
|
|
"reco = np.loadtxt(\"Recommendations generated/ml-100k/Self_P3_reco.csv\", delimiter=\",\")\n",
|
|
|
|
"\n",
|
|
|
|
"ev.evaluate(\n",
|
|
|
|
" test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n",
|
|
|
|
" estimations_df=estimations_df,\n",
|
|
|
|
" reco=reco,\n",
|
|
|
|
" super_reactions=[4, 5],\n",
|
|
|
|
")"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"# Let's check hyperparameters"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"##### Alpha"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 6,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stderr",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
2021-06-12 11:14:56 +02:00
|
|
|
" 0%| | 0/8 [00:00<?, ?it/s]\n",
|
|
|
|
"943it [00:00, 13897.79it/s]\n",
|
|
|
|
" 12%|██████████▌ | 1/8 [00:06<00:42, 6.02s/it]\n",
|
|
|
|
"943it [00:00, 13501.01it/s]\n",
|
|
|
|
" 25%|█████████████████████ | 2/8 [00:11<00:35, 5.97s/it]\n",
|
|
|
|
"943it [00:00, 12601.73it/s]\n",
|
|
|
|
" 38%|███████████████████████████████▌ | 3/8 [00:17<00:29, 5.99s/it]\n",
|
|
|
|
"943it [00:00, 12158.08it/s]\n",
|
|
|
|
" 50%|██████████████████████████████████████████ | 4/8 [00:24<00:24, 6.05s/it]\n",
|
|
|
|
"943it [00:00, 12607.07it/s]\n",
|
|
|
|
" 62%|████████████████████████████████████████████████████▌ | 5/8 [00:30<00:18, 6.12s/it]\n",
|
|
|
|
"943it [00:00, 13126.43it/s]\n",
|
|
|
|
" 75%|███████████████████████████████████████████████████████████████ | 6/8 [00:36<00:12, 6.11s/it]\n",
|
|
|
|
"943it [00:00, 12777.35it/s]\n",
|
|
|
|
" 88%|█████████████████████████████████████████████████████████████████████████▌ | 7/8 [00:42<00:06, 6.15s/it]\n",
|
|
|
|
"943it [00:00, 12441.39it/s]\n",
|
|
|
|
"100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:48<00:00, 6.10s/it]\n"
|
2021-05-07 22:16:28 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>Alpha</th>\n",
|
|
|
|
" <th>RMSE</th>\n",
|
|
|
|
" <th>MAE</th>\n",
|
|
|
|
" <th>precision</th>\n",
|
|
|
|
" <th>recall</th>\n",
|
|
|
|
" <th>F_1</th>\n",
|
|
|
|
" <th>F_05</th>\n",
|
|
|
|
" <th>precision_super</th>\n",
|
|
|
|
" <th>recall_super</th>\n",
|
|
|
|
" <th>NDCG</th>\n",
|
|
|
|
" <th>mAP</th>\n",
|
|
|
|
" <th>MRR</th>\n",
|
|
|
|
" <th>LAUC</th>\n",
|
|
|
|
" <th>HR</th>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>HitRate2</th>\n",
|
|
|
|
" <th>HitRate3</th>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <th>Reco in test</th>\n",
|
|
|
|
" <th>Test coverage</th>\n",
|
|
|
|
" <th>Shannon</th>\n",
|
|
|
|
" <th>Gini</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.2</td>\n",
|
|
|
|
" <td>268.177832</td>\n",
|
|
|
|
" <td>211.732649</td>\n",
|
|
|
|
" <td>0.262672</td>\n",
|
|
|
|
" <td>0.166858</td>\n",
|
|
|
|
" <td>0.166277</td>\n",
|
|
|
|
" <td>0.197184</td>\n",
|
|
|
|
" <td>0.187661</td>\n",
|
|
|
|
" <td>0.203252</td>\n",
|
|
|
|
" <td>0.320910</td>\n",
|
|
|
|
" <td>0.196132</td>\n",
|
|
|
|
" <td>0.563378</td>\n",
|
|
|
|
" <td>0.580866</td>\n",
|
|
|
|
" <td>0.850477</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.629905</td>\n",
|
|
|
|
" <td>0.451750</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
" <td>0.060606</td>\n",
|
|
|
|
" <td>3.669627</td>\n",
|
|
|
|
" <td>0.979636</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.4</td>\n",
|
|
|
|
" <td>10.546689</td>\n",
|
|
|
|
" <td>7.792373</td>\n",
|
|
|
|
" <td>0.268505</td>\n",
|
|
|
|
" <td>0.172669</td>\n",
|
|
|
|
" <td>0.171569</td>\n",
|
|
|
|
" <td>0.202643</td>\n",
|
|
|
|
" <td>0.192489</td>\n",
|
|
|
|
" <td>0.212653</td>\n",
|
|
|
|
" <td>0.326760</td>\n",
|
|
|
|
" <td>0.200172</td>\n",
|
|
|
|
" <td>0.565148</td>\n",
|
|
|
|
" <td>0.583801</td>\n",
|
|
|
|
" <td>0.854719</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.644751</td>\n",
|
|
|
|
" <td>0.458112</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
" <td>0.064214</td>\n",
|
|
|
|
" <td>3.726996</td>\n",
|
|
|
|
" <td>0.978426</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.6</td>\n",
|
|
|
|
" <td>3.143988</td>\n",
|
|
|
|
" <td>2.948790</td>\n",
|
|
|
|
" <td>0.274655</td>\n",
|
|
|
|
" <td>0.180502</td>\n",
|
|
|
|
" <td>0.177820</td>\n",
|
|
|
|
" <td>0.208730</td>\n",
|
|
|
|
" <td>0.198176</td>\n",
|
|
|
|
" <td>0.222746</td>\n",
|
|
|
|
" <td>0.332872</td>\n",
|
|
|
|
" <td>0.203290</td>\n",
|
|
|
|
" <td>0.568872</td>\n",
|
|
|
|
" <td>0.587738</td>\n",
|
|
|
|
" <td>0.870626</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.657476</td>\n",
|
|
|
|
" <td>0.470838</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
" <td>0.065657</td>\n",
|
|
|
|
" <td>3.785282</td>\n",
|
|
|
|
" <td>0.977090</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.8</td>\n",
|
|
|
|
" <td>3.670728</td>\n",
|
|
|
|
" <td>3.495735</td>\n",
|
|
|
|
" <td>0.281972</td>\n",
|
|
|
|
" <td>0.189868</td>\n",
|
|
|
|
" <td>0.185300</td>\n",
|
|
|
|
" <td>0.216071</td>\n",
|
|
|
|
" <td>0.203541</td>\n",
|
|
|
|
" <td>0.236751</td>\n",
|
|
|
|
" <td>0.339867</td>\n",
|
|
|
|
" <td>0.206688</td>\n",
|
|
|
|
" <td>0.573729</td>\n",
|
|
|
|
" <td>0.592432</td>\n",
|
|
|
|
" <td>0.874867</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.685048</td>\n",
|
|
|
|
" <td>0.492047</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
" <td>0.070707</td>\n",
|
|
|
|
" <td>3.832415</td>\n",
|
|
|
|
" <td>0.975998</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
" <td>3.702446</td>\n",
|
|
|
|
" <td>3.527273</td>\n",
|
|
|
|
" <td>0.282185</td>\n",
|
|
|
|
" <td>0.192092</td>\n",
|
|
|
|
" <td>0.186749</td>\n",
|
|
|
|
" <td>0.216980</td>\n",
|
|
|
|
" <td>0.204185</td>\n",
|
|
|
|
" <td>0.240096</td>\n",
|
|
|
|
" <td>0.339114</td>\n",
|
|
|
|
" <td>0.204905</td>\n",
|
|
|
|
" <td>0.572157</td>\n",
|
|
|
|
" <td>0.593544</td>\n",
|
|
|
|
" <td>0.875928</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.685048</td>\n",
|
|
|
|
" <td>0.495228</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
" <td>0.077201</td>\n",
|
|
|
|
" <td>3.875892</td>\n",
|
|
|
|
" <td>0.974947</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>1.2</td>\n",
|
|
|
|
" <td>3.704441</td>\n",
|
|
|
|
" <td>3.529251</td>\n",
|
|
|
|
" <td>0.280912</td>\n",
|
|
|
|
" <td>0.193633</td>\n",
|
|
|
|
" <td>0.187311</td>\n",
|
|
|
|
" <td>0.216872</td>\n",
|
|
|
|
" <td>0.203004</td>\n",
|
|
|
|
" <td>0.240588</td>\n",
|
|
|
|
" <td>0.338049</td>\n",
|
|
|
|
" <td>0.203453</td>\n",
|
|
|
|
" <td>0.571830</td>\n",
|
|
|
|
" <td>0.594313</td>\n",
|
|
|
|
" <td>0.883351</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.681866</td>\n",
|
|
|
|
" <td>0.498409</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
" <td>0.085859</td>\n",
|
|
|
|
" <td>3.910718</td>\n",
|
|
|
|
" <td>0.974073</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>1.4</td>\n",
|
|
|
|
" <td>3.704580</td>\n",
|
|
|
|
" <td>3.529388</td>\n",
|
|
|
|
" <td>0.273595</td>\n",
|
|
|
|
" <td>0.190651</td>\n",
|
|
|
|
" <td>0.183874</td>\n",
|
|
|
|
" <td>0.212183</td>\n",
|
|
|
|
" <td>0.199464</td>\n",
|
|
|
|
" <td>0.239118</td>\n",
|
|
|
|
" <td>0.329550</td>\n",
|
|
|
|
" <td>0.195433</td>\n",
|
|
|
|
" <td>0.566171</td>\n",
|
|
|
|
" <td>0.592793</td>\n",
|
|
|
|
" <td>0.871686</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.675504</td>\n",
|
|
|
|
" <td>0.489926</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
" <td>0.107504</td>\n",
|
|
|
|
" <td>3.961915</td>\n",
|
|
|
|
" <td>0.972674</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>1.6</td>\n",
|
|
|
|
" <td>3.704591</td>\n",
|
|
|
|
" <td>3.529399</td>\n",
|
|
|
|
" <td>0.263097</td>\n",
|
|
|
|
" <td>0.186255</td>\n",
|
|
|
|
" <td>0.178709</td>\n",
|
|
|
|
" <td>0.205170</td>\n",
|
|
|
|
" <td>0.191094</td>\n",
|
|
|
|
" <td>0.232920</td>\n",
|
|
|
|
" <td>0.317439</td>\n",
|
|
|
|
" <td>0.184917</td>\n",
|
|
|
|
" <td>0.552349</td>\n",
|
|
|
|
" <td>0.590545</td>\n",
|
|
|
|
" <td>0.868505</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.669141</td>\n",
|
|
|
|
" <td>0.462354</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>0.999576</td>\n",
|
|
|
|
" <td>0.156566</td>\n",
|
|
|
|
" <td>4.060156</td>\n",
|
|
|
|
" <td>0.969203</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
|
|
|
" Alpha RMSE MAE precision recall F_1 F_05 \\\n",
|
|
|
|
"0 0.2 268.177832 211.732649 0.262672 0.166858 0.166277 0.197184 \n",
|
|
|
|
"0 0.4 10.546689 7.792373 0.268505 0.172669 0.171569 0.202643 \n",
|
|
|
|
"0 0.6 3.143988 2.948790 0.274655 0.180502 0.177820 0.208730 \n",
|
|
|
|
"0 0.8 3.670728 3.495735 0.281972 0.189868 0.185300 0.216071 \n",
|
|
|
|
"0 1.0 3.702446 3.527273 0.282185 0.192092 0.186749 0.216980 \n",
|
|
|
|
"0 1.2 3.704441 3.529251 0.280912 0.193633 0.187311 0.216872 \n",
|
|
|
|
"0 1.4 3.704580 3.529388 0.273595 0.190651 0.183874 0.212183 \n",
|
|
|
|
"0 1.6 3.704591 3.529399 0.263097 0.186255 0.178709 0.205170 \n",
|
|
|
|
"\n",
|
|
|
|
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
|
|
|
|
"0 0.187661 0.203252 0.320910 0.196132 0.563378 0.580866 \n",
|
|
|
|
"0 0.192489 0.212653 0.326760 0.200172 0.565148 0.583801 \n",
|
|
|
|
"0 0.198176 0.222746 0.332872 0.203290 0.568872 0.587738 \n",
|
|
|
|
"0 0.203541 0.236751 0.339867 0.206688 0.573729 0.592432 \n",
|
|
|
|
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
|
|
|
|
"0 0.203004 0.240588 0.338049 0.203453 0.571830 0.594313 \n",
|
|
|
|
"0 0.199464 0.239118 0.329550 0.195433 0.566171 0.592793 \n",
|
|
|
|
"0 0.191094 0.232920 0.317439 0.184917 0.552349 0.590545 \n",
|
|
|
|
"\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" HR HitRate2 HitRate3 Reco in test Test coverage Shannon \\\n",
|
|
|
|
"0 0.850477 0.629905 0.451750 1.000000 0.060606 3.669627 \n",
|
|
|
|
"0 0.854719 0.644751 0.458112 1.000000 0.064214 3.726996 \n",
|
|
|
|
"0 0.870626 0.657476 0.470838 1.000000 0.065657 3.785282 \n",
|
|
|
|
"0 0.874867 0.685048 0.492047 1.000000 0.070707 3.832415 \n",
|
|
|
|
"0 0.875928 0.685048 0.495228 1.000000 0.077201 3.875892 \n",
|
|
|
|
"0 0.883351 0.681866 0.498409 1.000000 0.085859 3.910718 \n",
|
|
|
|
"0 0.871686 0.675504 0.489926 1.000000 0.107504 3.961915 \n",
|
|
|
|
"0 0.868505 0.669141 0.462354 0.999576 0.156566 4.060156 \n",
|
|
|
|
"\n",
|
|
|
|
" Gini \n",
|
|
|
|
"0 0.979636 \n",
|
|
|
|
"0 0.978426 \n",
|
|
|
|
"0 0.977090 \n",
|
|
|
|
"0 0.975998 \n",
|
|
|
|
"0 0.974947 \n",
|
|
|
|
"0 0.974073 \n",
|
|
|
|
"0 0.972674 \n",
|
|
|
|
"0 0.969203 "
|
2021-05-07 22:16:28 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 6,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"from tqdm import tqdm\n",
|
|
|
|
"\n",
|
|
|
|
"result = []\n",
|
|
|
|
"for alpha in tqdm([round(i, 1) for i in np.arange(0.2, 1.6001, 0.2)]):\n",
|
|
|
|
" model = RP3Beta()\n",
|
|
|
|
" model.fit(train_ui, alpha=alpha, beta=0)\n",
|
|
|
|
" reco = pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
|
|
|
|
" estimations_df = pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
|
|
|
|
" to_append = ev.evaluate(\n",
|
|
|
|
" test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n",
|
|
|
|
" estimations_df=estimations_df,\n",
|
|
|
|
" reco=np.array(reco),\n",
|
|
|
|
" super_reactions=[4, 5],\n",
|
|
|
|
" )\n",
|
|
|
|
" to_append.insert(0, \"Alpha\", alpha)\n",
|
|
|
|
" result.append(to_append)\n",
|
|
|
|
"\n",
|
|
|
|
"result = pd.concat(result)\n",
|
|
|
|
"result"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 7,
|
|
|
|
"metadata": {
|
|
|
|
"scrolled": false
|
|
|
|
},
|
|
|
|
"outputs": [
|
2021-06-12 11:14:56 +02:00
|
|
|
{
|
|
|
|
"ename": "IndexError",
|
|
|
|
"evalue": "list index out of range",
|
|
|
|
"output_type": "error",
|
|
|
|
"traceback": [
|
|
|
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
|
"\u001b[1;31mIndexError\u001b[0m Traceback (most recent call last)",
|
|
|
|
"\u001b[1;32m<ipython-input-7-720c7bf8f8eb>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"Alpha\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 17\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0max\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxes\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mto_iter\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtitle\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
|
|
|
|
"\u001b[1;31mIndexError\u001b[0m: list index out of range"
|
|
|
|
]
|
|
|
|
},
|
2021-05-07 22:16:28 +02:00
|
|
|
{
|
|
|
|
"data": {
|
2021-06-12 11:14:56 +02:00
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABCQAAAkoCAYAAACzg26yAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOz9fXxU9Z3//z9euc6EQDIhQEIyJCqioIAW8Pqq1lZbUbtrW1zrWqsf193a7bbd3dr9dNtuW7/b7ba73X7Wrj+3rbrVai/sBVi1Wre29QIBFRAEFAVCSIAQwnWu8/r9MSc4xAAJZOZMZp73221umfM+58x5DabvnrzO+/16m7sjIiIiIiIiIpJKOWEHICIiIiIiIiLZRwkJEREREREREUk5JSREREREREREJOWUkBARERERERGRlFNCQkRERERERERSTgkJEREREREREUk5JSRERERERETkIDO728z+cQjHrTazi5MfkWQqJSQk45jZRjNrN7N9ZrbVzO4zszHBvvvMzM3sqgHnfDto/1iwXWBm3zKzxuBzNpjZvx/mGv2v/0zpFxURGUWCfrPLzMYPaF8e9L91CW1fDtrmDTj2Y2bWO6Dv3Wdm1Sn6GiIiWcHdb3P3rw7huBnu/kwKQpIMpYSEZKr57j4GmA2cAXw+Yd/rwI39G2aWB3wIeDPhmM8Dc4B5QClwCfDKYNdIeN0+4t9CRCSzbACu698ws9OB4sQDzMyAG4CdJPTVCV4Y0PeOcfemZAYtIjIaBfe4ImlNCQnJaO6+FfgN8cREv0XAeWZWHmxfDqwEtiYcMxf4hbs3edxGd/+fVMQsIpLBfgj8ecL2jcDAvvUCoBr4FLDAzApSFJuIyKgQjDj7vJm9ZmZtZnavmRWZ2cXB6N7PmdlW4F4zyzGzO8zsTTNrNbOfmFk04bPON7PnzWyXmW1OGC18n5l9LXg/3sweDY7ZaWZ/NLOchFjeE7wvDEYdNwWvb5tZYbCvP7bPmtl2M2s2s5tS/W8n6UcJCcloZlYDXAGsT2juABYCC4LtP+edN8SLgc+Y2V+Z2enBEzsRETk+i4GxZnaqmeUCHwEeGHDMjcQTxz8Otq9MYXwiIqPF9cD7gBOBk4EvBO2TgCgwBbgV+GvgGuAi4sneNuAuADOLAY8D/w+oJP4Ab/kg1/os0BgcMxH4B8AHOe7/AmcHnzOL+EjjLyTsnwSMAyYDNwN3JTwglCylhIRkql+a2V5gM7Ad+NKA/f8D/LmZjSPeQf9ywP5/Bv6FeGe/DNhiZgOHDv8yyBT3v/7PSH8JEZEM1D9K4jJgLbClf4eZRYhPofuRu3cDP+Od0zbOHtD3vomISPb5T3ff7O47gTt5ezpcH/Ald+9093bgL4D/6+6N7t4JfBm4NpjOcT3wW3d/yN273b3V3ZcPcq1uoAqYEhz3R3cfLCFxPfAVd9/u7i3APxGfgpf4OV8JPuMxYB8w7Tj/HWSUU0JCMtU17l4KXAycAhxSRM3dnyWe5f0C8GjQYSfu73X3u9z9PKCMeEf/AzM7dcA1yhJe/528ryMikjF+CPwZ8DHeOTrtg0AP8Fiw/SBwhZlVJhyzeEDfe2KyAxYRSUObE95vIj76AaDF3TsS9k0BftGfxAXWAL3ERzrUcmgNtcP5V+KjjZ80s7fM7I7DHFcdxDJYXACt7t6TsH0AGDOE60sGU0JCMpq7/x64D/jmILsfID4E7Yi1Idy93d3vIj7EbfpIxygikk3cfRPx4pbvB34+YPeNxG9OG4L5zz8F8kkohCkiIkA8mdAvBvQX9x04cmEzcMWARG6Ru28J9h01qevue939s+5+AjCf+LTmSwc5tIl4AmSwuEQGpYSEZINvA5eZ2ewB7d8hPmT4DwNPMLO/CYrvFJtZXjBdo5R3rrQhIiLDdzPwbnffn9A2GbiUeM2I2bw9B/lfGHy1DRGRbPYJM6sJClT+A2/X3RnobuBOM5sCYGaVZnZ1sO9B4D1m9uHgfrdikPtlzOxKMzspqKm2h/gIi95BrvUQ8IXgGuOBL/LOOkEih1BCQjJeMIftf4B/HNC+092fPswcuHbgW8RX3tgBfAL4U3d/K+GYRWa2L+H1iyR9BRGRjOLub7r7sgHNFwDL3f1Jd9/a/yKePJ5pZqcFx50zoO/dZ2ZzU/oFRETC9yPgSeCt4PW1wxz3H8SLuT8Z1FdbDJwF4O4NxEerfZb4UsvLiSeCB5oK/JZ4zYcXgO+6+zODHPc14rXXVgKvAi8fIS4RAGzwv8VEREREREQk3ZjZRuAWd/9t2LGIHC+NkBARERERERGRlFNCQkRERERERERSTlM2RERERERERCTlNEJCRERERERERFJOCQkRERERERERSbm8sAMYCePHj/e6urqwwxAReYeXXnpph7tXhh1HKqgvFpF0pH5YRCR8h+uLMyIhUVdXx7JlA5czFxEJn5ltCjuGVFFfLCLpSP2wiEj4DtcXa8qGiIiIiIiIiKScEhIiIiIiIiIiknJKSIiIiIiIiIhIymVEDQkRSQ/d3d00NjbS0dERdigpV1RURE1NDfn5+WGHIiLyDtnQP6sfFpGRkA39ZTINty9WQkJERkxjYyOlpaXU1dVhZmGHkzLuTmtrK42NjdTX14cdjojIO2R6/5yqftjMLgf+A8gFvufuXx+w/3rgc8HmPuAv3X1FsO/TwC2AA68CN7l7h5n9KzAf6ALeDNp3mVkdsAZYF3zeYne/LWlfTkSAzO8vk+lY+mJN2RCREdPR0UFFRUXWdd5mRkVFhTLpIpK2Mr1/TkU/bGa5wF3AFcB04Dozmz7gsA3ARe4+E/gqcE9w7mTgr4E57n4a8YTGguCcp4DTgnNeBz6f8Hlvuvvs4KVkhEgKZHp/mUzH0hcrISEiIypbO+9s/d4iMnpkej+Vgu83D1jv7m+5exfwMHB14gHu/ry7twWbi4GahN15QLGZ5QERoCk450l37znMOSISgkzvL5NpuP92SkiISEbJzc1l9uzZnHbaacyfP59du3YBsHHjRsyMf/zHfzx47I4dO8jPz+f2228HYN26dVx88cXMnj2bU089lVtvvRWAZ555hnHjxjF79uyDr9/+9rcp/24iIqOZmXHDDTcc3O7p6aGyspIrr7zykOOuvvpqzjnnnEPavvzlLzN58uRD+uH+/j2FJgObE7Ybg7bDuRl4HMDdtwDfBBqAZmC3uz85yDkf7z8nUG9mr5jZ783sguMJXkSy17Jly/jrv/7rw+5vamri2muvTWFEb1NCQkQySnFxMcuXL2fVqlVEo1Huuuuug/tOOOEEHn300YPbP/3pT5kxY8bB7b/+67/m05/+NMuXL2fNmjV88pOfPLjvggsuYPny5Qdf73nPe1LzhUREMkRJSQmrVq2ivb0dgKeeeorJkw/9e37Xrl28/PLL7Nq1iw0bNhyyr79/7n+VlZWlKvR+gz3280EPNLuEeELic8F2OfHRFPVANVBiZh8dcM7/BXqAB4OmZiDm7mcAnwF+ZGZjB7nWrWa2zMyWtbS0HNMXE5HRpbe3d1jHz5kzh+985zuH3V9dXc3Pfvaz4w3rmCghISIZ65xzzmHLli0Ht4uLizn11FNZtmwZAD/+8Y/58Ic/fHB/c3MzNTVvj5Q9/fTTUxesiEgWuOKKK/j1r38NwEMPPcR11113yP5HHnmE+fPns2DBAh5++OEwQjySRqA2YbuGYNpFIjObCXwPuNrdW4Pm9wAb3L3F3buBnwPnJpxzI3AlcL27O4C7d/af7+4vES94efLA67n7Pe4+x93nVFZWjsDXFJEwbdy4kVNOOYUbb7yRmTNncu2113LgwAHq6ur4yle+wvnnn89Pf/pTnnzySc455xzOPPNMPvShD7Fv3z4Ali5dyrnnnsusWbOYN28ee/fu5Zlnnjk4Gu33v//9wZFmZ5xxBnv37mXjxo2cdtppQLyGxk033cTpp5/OGWecwe9+9zsA7rvvPv7kT/6Eyy+/nKlTp/L3f//3I/J9tcqGiCTFPy1azWtNe0b0M6dXj+VL82cc/UDimeOnn36am2+++ZD2/pvcSZMmkZubS3V1NU1N8fv
|
2021-05-07 22:16:28 +02:00
|
|
|
"text/plain": [
|
|
|
|
"<Figure size 1296x3024 with 18 Axes>"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"metadata": {
|
|
|
|
"needs_background": "light"
|
|
|
|
},
|
|
|
|
"output_type": "display_data"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"metrics = list(result.columns[[i not in [\"Alpha\"] for i in result.columns]])\n",
|
|
|
|
"\n",
|
|
|
|
"charts_per_row = 6\n",
|
|
|
|
"charts_per_column = 3\n",
|
|
|
|
"\n",
|
|
|
|
"fig, axes = plt.subplots(\n",
|
|
|
|
" nrows=charts_per_row, ncols=charts_per_column, figsize=(18, 7 * charts_per_row)\n",
|
|
|
|
")\n",
|
|
|
|
"import itertools\n",
|
|
|
|
"\n",
|
|
|
|
"to_iter = [\n",
|
|
|
|
" i for i in itertools.product(range(charts_per_row), range(charts_per_column))\n",
|
|
|
|
"]\n",
|
|
|
|
"\n",
|
|
|
|
"for i in range(len(metrics)):\n",
|
|
|
|
" df = result[[\"Alpha\", metrics[i]]]\n",
|
|
|
|
" df.plot(ax=axes[to_iter[i]], title=metrics[i], x=0, y=1)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"##### Beta"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 8,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stderr",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
2021-06-12 11:14:56 +02:00
|
|
|
" 0%| | 0/10 [00:00<?, ?it/s]\n",
|
|
|
|
"943it [00:00, 12120.56it/s]\n",
|
|
|
|
" 10%|████████▎ | 1/10 [00:06<00:54, 6.01s/it]\n",
|
|
|
|
"943it [00:00, 12283.97it/s]\n",
|
|
|
|
" 20%|████████████████▌ | 2/10 [00:12<00:48, 6.06s/it]\n",
|
|
|
|
"943it [00:00, 12771.78it/s]\n",
|
|
|
|
" 30%|████████████████████████▉ | 3/10 [00:18<00:42, 6.03s/it]\n",
|
|
|
|
"943it [00:00, 13126.91it/s]\n",
|
|
|
|
" 40%|█████████████████████████████████▏ | 4/10 [00:24<00:36, 6.05s/it]\n",
|
|
|
|
"943it [00:00, 12952.63it/s]\n",
|
|
|
|
" 50%|█████████████████████████████████████████▌ | 5/10 [00:30<00:30, 6.09s/it]\n",
|
|
|
|
"943it [00:00, 13132.62it/s]\n",
|
|
|
|
" 60%|█████████████████████████████████████████████████▊ | 6/10 [00:36<00:24, 6.12s/it]\n",
|
|
|
|
"943it [00:00, 12603.86it/s]\n",
|
|
|
|
" 70%|██████████████████████████████████████████████████████████ | 7/10 [00:42<00:18, 6.10s/it]\n",
|
|
|
|
"943it [00:00, 12273.98it/s]\n",
|
|
|
|
" 80%|██████████████████████████████████████████████████████████████████▍ | 8/10 [00:48<00:12, 6.08s/it]\n",
|
|
|
|
"943it [00:00, 13125.77it/s]\n",
|
|
|
|
" 90%|██████████████████████████████████████████████████████████████████████████▋ | 9/10 [00:54<00:06, 6.13s/it]\n",
|
|
|
|
"943it [00:00, 13697.33it/s]\n",
|
|
|
|
"100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [01:01<00:00, 6.12s/it]\n"
|
2021-05-07 22:16:28 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>Beta</th>\n",
|
|
|
|
" <th>RMSE</th>\n",
|
|
|
|
" <th>MAE</th>\n",
|
|
|
|
" <th>precision</th>\n",
|
|
|
|
" <th>recall</th>\n",
|
|
|
|
" <th>F_1</th>\n",
|
|
|
|
" <th>F_05</th>\n",
|
|
|
|
" <th>precision_super</th>\n",
|
|
|
|
" <th>recall_super</th>\n",
|
|
|
|
" <th>NDCG</th>\n",
|
|
|
|
" <th>mAP</th>\n",
|
|
|
|
" <th>MRR</th>\n",
|
|
|
|
" <th>LAUC</th>\n",
|
|
|
|
" <th>HR</th>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>HitRate2</th>\n",
|
|
|
|
" <th>HitRate3</th>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <th>Reco in test</th>\n",
|
|
|
|
" <th>Test coverage</th>\n",
|
|
|
|
" <th>Shannon</th>\n",
|
|
|
|
" <th>Gini</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
" <td>3.702446</td>\n",
|
|
|
|
" <td>3.527273</td>\n",
|
|
|
|
" <td>0.282185</td>\n",
|
|
|
|
" <td>0.192092</td>\n",
|
|
|
|
" <td>0.186749</td>\n",
|
|
|
|
" <td>0.216980</td>\n",
|
|
|
|
" <td>0.204185</td>\n",
|
|
|
|
" <td>0.240096</td>\n",
|
|
|
|
" <td>0.339114</td>\n",
|
|
|
|
" <td>0.204905</td>\n",
|
|
|
|
" <td>0.572157</td>\n",
|
|
|
|
" <td>0.593544</td>\n",
|
|
|
|
" <td>0.875928</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.685048</td>\n",
|
|
|
|
" <td>0.495228</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
" <td>0.077201</td>\n",
|
|
|
|
" <td>3.875892</td>\n",
|
|
|
|
" <td>0.974947</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.1</td>\n",
|
|
|
|
" <td>3.703312</td>\n",
|
|
|
|
" <td>3.528128</td>\n",
|
|
|
|
" <td>0.290138</td>\n",
|
|
|
|
" <td>0.197597</td>\n",
|
|
|
|
" <td>0.192259</td>\n",
|
|
|
|
" <td>0.223336</td>\n",
|
|
|
|
" <td>0.210944</td>\n",
|
|
|
|
" <td>0.246153</td>\n",
|
|
|
|
" <td>0.347768</td>\n",
|
|
|
|
" <td>0.212034</td>\n",
|
|
|
|
" <td>0.581038</td>\n",
|
|
|
|
" <td>0.596328</td>\n",
|
|
|
|
" <td>0.884411</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.695652</td>\n",
|
|
|
|
" <td>0.514316</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
" <td>0.085137</td>\n",
|
|
|
|
" <td>3.957416</td>\n",
|
|
|
|
" <td>0.972784</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.2</td>\n",
|
|
|
|
" <td>3.703825</td>\n",
|
|
|
|
" <td>3.528636</td>\n",
|
|
|
|
" <td>0.297137</td>\n",
|
|
|
|
" <td>0.201202</td>\n",
|
|
|
|
" <td>0.196067</td>\n",
|
|
|
|
" <td>0.228169</td>\n",
|
|
|
|
" <td>0.218026</td>\n",
|
|
|
|
" <td>0.252767</td>\n",
|
|
|
|
" <td>0.355655</td>\n",
|
|
|
|
" <td>0.219909</td>\n",
|
|
|
|
" <td>0.588904</td>\n",
|
|
|
|
" <td>0.598160</td>\n",
|
|
|
|
" <td>0.886532</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.697773</td>\n",
|
|
|
|
" <td>0.515376</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
" <td>0.094517</td>\n",
|
|
|
|
" <td>4.053212</td>\n",
|
|
|
|
" <td>0.969980</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.3</td>\n",
|
|
|
|
" <td>3.704130</td>\n",
|
|
|
|
" <td>3.528939</td>\n",
|
|
|
|
" <td>0.303499</td>\n",
|
|
|
|
" <td>0.204749</td>\n",
|
|
|
|
" <td>0.199901</td>\n",
|
|
|
|
" <td>0.232829</td>\n",
|
|
|
|
" <td>0.225107</td>\n",
|
|
|
|
" <td>0.260797</td>\n",
|
|
|
|
" <td>0.363757</td>\n",
|
|
|
|
" <td>0.226825</td>\n",
|
|
|
|
" <td>0.599969</td>\n",
|
|
|
|
" <td>0.599964</td>\n",
|
|
|
|
" <td>0.888653</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.707317</td>\n",
|
|
|
|
" <td>0.531283</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
" <td>0.105339</td>\n",
|
|
|
|
" <td>4.147779</td>\n",
|
|
|
|
" <td>0.966948</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.4</td>\n",
|
|
|
|
" <td>3.704313</td>\n",
|
|
|
|
" <td>3.529120</td>\n",
|
|
|
|
" <td>0.308908</td>\n",
|
|
|
|
" <td>0.208811</td>\n",
|
|
|
|
" <td>0.203854</td>\n",
|
|
|
|
" <td>0.237241</td>\n",
|
|
|
|
" <td>0.229614</td>\n",
|
|
|
|
" <td>0.266918</td>\n",
|
|
|
|
" <td>0.370758</td>\n",
|
|
|
|
" <td>0.232673</td>\n",
|
|
|
|
" <td>0.609385</td>\n",
|
|
|
|
" <td>0.602014</td>\n",
|
|
|
|
" <td>0.895016</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.718982</td>\n",
|
|
|
|
" <td>0.537646</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>0.999894</td>\n",
|
|
|
|
" <td>0.132035</td>\n",
|
|
|
|
" <td>4.259682</td>\n",
|
|
|
|
" <td>0.962989</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.5</td>\n",
|
|
|
|
" <td>3.704422</td>\n",
|
|
|
|
" <td>3.529229</td>\n",
|
|
|
|
" <td>0.314316</td>\n",
|
|
|
|
" <td>0.211411</td>\n",
|
|
|
|
" <td>0.206768</td>\n",
|
|
|
|
" <td>0.240986</td>\n",
|
|
|
|
" <td>0.237124</td>\n",
|
|
|
|
" <td>0.273416</td>\n",
|
|
|
|
" <td>0.378307</td>\n",
|
|
|
|
" <td>0.239297</td>\n",
|
|
|
|
" <td>0.622792</td>\n",
|
|
|
|
" <td>0.603327</td>\n",
|
|
|
|
" <td>0.903499</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.724284</td>\n",
|
|
|
|
" <td>0.548250</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>0.999046</td>\n",
|
|
|
|
" <td>0.168831</td>\n",
|
|
|
|
" <td>4.411281</td>\n",
|
|
|
|
" <td>0.956648</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.6</td>\n",
|
|
|
|
" <td>3.704488</td>\n",
|
|
|
|
" <td>3.529295</td>\n",
|
|
|
|
" <td>0.314634</td>\n",
|
|
|
|
" <td>0.206209</td>\n",
|
|
|
|
" <td>0.204818</td>\n",
|
|
|
|
" <td>0.240159</td>\n",
|
|
|
|
" <td>0.242489</td>\n",
|
|
|
|
" <td>0.273850</td>\n",
|
|
|
|
" <td>0.376438</td>\n",
|
|
|
|
" <td>0.238428</td>\n",
|
|
|
|
" <td>0.622042</td>\n",
|
|
|
|
" <td>0.600721</td>\n",
|
|
|
|
" <td>0.897137</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.720042</td>\n",
|
|
|
|
" <td>0.542948</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>0.996394</td>\n",
|
|
|
|
" <td>0.212843</td>\n",
|
|
|
|
" <td>4.621938</td>\n",
|
|
|
|
" <td>0.945932</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.7</td>\n",
|
|
|
|
" <td>3.704528</td>\n",
|
|
|
|
" <td>3.529335</td>\n",
|
|
|
|
" <td>0.304136</td>\n",
|
|
|
|
" <td>0.187298</td>\n",
|
|
|
|
" <td>0.191990</td>\n",
|
|
|
|
" <td>0.228749</td>\n",
|
|
|
|
" <td>0.238305</td>\n",
|
|
|
|
" <td>0.256201</td>\n",
|
|
|
|
" <td>0.358807</td>\n",
|
|
|
|
" <td>0.226808</td>\n",
|
|
|
|
" <td>0.593897</td>\n",
|
|
|
|
" <td>0.591207</td>\n",
|
|
|
|
" <td>0.868505</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.693531</td>\n",
|
|
|
|
" <td>0.520679</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>0.983033</td>\n",
|
|
|
|
" <td>0.256854</td>\n",
|
|
|
|
" <td>4.898568</td>\n",
|
|
|
|
" <td>0.928065</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.8</td>\n",
|
|
|
|
" <td>3.704552</td>\n",
|
|
|
|
" <td>3.529360</td>\n",
|
|
|
|
" <td>0.266384</td>\n",
|
|
|
|
" <td>0.147571</td>\n",
|
|
|
|
" <td>0.158660</td>\n",
|
|
|
|
" <td>0.194838</td>\n",
|
|
|
|
" <td>0.214485</td>\n",
|
|
|
|
" <td>0.209336</td>\n",
|
|
|
|
" <td>0.299850</td>\n",
|
|
|
|
" <td>0.184356</td>\n",
|
|
|
|
" <td>0.492852</td>\n",
|
|
|
|
" <td>0.571152</td>\n",
|
|
|
|
" <td>0.803818</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.604454</td>\n",
|
|
|
|
" <td>0.428420</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>0.936373</td>\n",
|
|
|
|
" <td>0.341270</td>\n",
|
|
|
|
" <td>5.257397</td>\n",
|
|
|
|
" <td>0.895882</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>0.9</td>\n",
|
|
|
|
" <td>3.704567</td>\n",
|
|
|
|
" <td>3.529375</td>\n",
|
|
|
|
" <td>0.162354</td>\n",
|
|
|
|
" <td>0.076967</td>\n",
|
|
|
|
" <td>0.089233</td>\n",
|
|
|
|
" <td>0.114583</td>\n",
|
|
|
|
" <td>0.134657</td>\n",
|
|
|
|
" <td>0.113253</td>\n",
|
|
|
|
" <td>0.160868</td>\n",
|
|
|
|
" <td>0.085486</td>\n",
|
|
|
|
" <td>0.243590</td>\n",
|
|
|
|
" <td>0.535405</td>\n",
|
|
|
|
" <td>0.580064</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>0.400848</td>\n",
|
|
|
|
" <td>0.261930</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>0.800106</td>\n",
|
|
|
|
" <td>0.415584</td>\n",
|
|
|
|
" <td>5.563910</td>\n",
|
|
|
|
" <td>0.857396</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
|
|
|
" Beta RMSE MAE precision recall F_1 F_05 \\\n",
|
|
|
|
"0 0.0 3.702446 3.527273 0.282185 0.192092 0.186749 0.216980 \n",
|
|
|
|
"0 0.1 3.703312 3.528128 0.290138 0.197597 0.192259 0.223336 \n",
|
|
|
|
"0 0.2 3.703825 3.528636 0.297137 0.201202 0.196067 0.228169 \n",
|
|
|
|
"0 0.3 3.704130 3.528939 0.303499 0.204749 0.199901 0.232829 \n",
|
|
|
|
"0 0.4 3.704313 3.529120 0.308908 0.208811 0.203854 0.237241 \n",
|
|
|
|
"0 0.5 3.704422 3.529229 0.314316 0.211411 0.206768 0.240986 \n",
|
|
|
|
"0 0.6 3.704488 3.529295 0.314634 0.206209 0.204818 0.240159 \n",
|
|
|
|
"0 0.7 3.704528 3.529335 0.304136 0.187298 0.191990 0.228749 \n",
|
|
|
|
"0 0.8 3.704552 3.529360 0.266384 0.147571 0.158660 0.194838 \n",
|
|
|
|
"0 0.9 3.704567 3.529375 0.162354 0.076967 0.089233 0.114583 \n",
|
|
|
|
"\n",
|
|
|
|
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
|
|
|
|
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
|
|
|
|
"0 0.210944 0.246153 0.347768 0.212034 0.581038 0.596328 \n",
|
|
|
|
"0 0.218026 0.252767 0.355655 0.219909 0.588904 0.598160 \n",
|
|
|
|
"0 0.225107 0.260797 0.363757 0.226825 0.599969 0.599964 \n",
|
|
|
|
"0 0.229614 0.266918 0.370758 0.232673 0.609385 0.602014 \n",
|
|
|
|
"0 0.237124 0.273416 0.378307 0.239297 0.622792 0.603327 \n",
|
|
|
|
"0 0.242489 0.273850 0.376438 0.238428 0.622042 0.600721 \n",
|
|
|
|
"0 0.238305 0.256201 0.358807 0.226808 0.593897 0.591207 \n",
|
|
|
|
"0 0.214485 0.209336 0.299850 0.184356 0.492852 0.571152 \n",
|
|
|
|
"0 0.134657 0.113253 0.160868 0.085486 0.243590 0.535405 \n",
|
|
|
|
"\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" HR HitRate2 HitRate3 Reco in test Test coverage Shannon \\\n",
|
|
|
|
"0 0.875928 0.685048 0.495228 1.000000 0.077201 3.875892 \n",
|
|
|
|
"0 0.884411 0.695652 0.514316 1.000000 0.085137 3.957416 \n",
|
|
|
|
"0 0.886532 0.697773 0.515376 1.000000 0.094517 4.053212 \n",
|
|
|
|
"0 0.888653 0.707317 0.531283 1.000000 0.105339 4.147779 \n",
|
|
|
|
"0 0.895016 0.718982 0.537646 0.999894 0.132035 4.259682 \n",
|
|
|
|
"0 0.903499 0.724284 0.548250 0.999046 0.168831 4.411281 \n",
|
|
|
|
"0 0.897137 0.720042 0.542948 0.996394 0.212843 4.621938 \n",
|
|
|
|
"0 0.868505 0.693531 0.520679 0.983033 0.256854 4.898568 \n",
|
|
|
|
"0 0.803818 0.604454 0.428420 0.936373 0.341270 5.257397 \n",
|
|
|
|
"0 0.580064 0.400848 0.261930 0.800106 0.415584 5.563910 \n",
|
|
|
|
"\n",
|
|
|
|
" Gini \n",
|
|
|
|
"0 0.974947 \n",
|
|
|
|
"0 0.972784 \n",
|
|
|
|
"0 0.969980 \n",
|
|
|
|
"0 0.966948 \n",
|
|
|
|
"0 0.962989 \n",
|
|
|
|
"0 0.956648 \n",
|
|
|
|
"0 0.945932 \n",
|
|
|
|
"0 0.928065 \n",
|
|
|
|
"0 0.895882 \n",
|
|
|
|
"0 0.857396 "
|
2021-05-07 22:16:28 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 8,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"from tqdm import tqdm\n",
|
|
|
|
"\n",
|
|
|
|
"result = []\n",
|
|
|
|
"for beta in tqdm([round(i, 1) for i in np.arange(0, 1, 0.1)]):\n",
|
|
|
|
" model = RP3Beta()\n",
|
|
|
|
" model.fit(train_ui, alpha=1, beta=beta)\n",
|
|
|
|
" reco = pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
|
|
|
|
" estimations_df = pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
|
|
|
|
" to_append = ev.evaluate(\n",
|
|
|
|
" test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n",
|
|
|
|
" estimations_df=estimations_df,\n",
|
|
|
|
" reco=np.array(reco),\n",
|
|
|
|
" super_reactions=[4, 5],\n",
|
|
|
|
" )\n",
|
|
|
|
" to_append.insert(0, \"Beta\", beta)\n",
|
|
|
|
" result.append(to_append)\n",
|
|
|
|
"\n",
|
|
|
|
"result = pd.concat(result)\n",
|
|
|
|
"result"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 9,
|
|
|
|
"metadata": {
|
|
|
|
"scrolled": false
|
|
|
|
},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABCwAAAkoCAYAAACgVC5GAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd3zV5f3//8criwwgARJmEhIQZA9FhrturUpbd9UO21rb2lZrh3bPX4d2fmq/fvx02Dpw1FnrAK2zkgAiIltIAoQZEkII2cnr98c5wRCDBEjyPjnneb/dzq3n/X5f75PXRezFm9e5rtdl7o6IiIiIiIiISCSJCzoAEREREREREZH2lLAQERERERERkYijhIWIiIiIiIiIRBwlLEREREREREQk4ihhISIiIiIiIiIRRwkLEREREREREYk4SliIiIiIiIjIfmZ2l5l9rxPtVprZ6d0fkcQqJSwk6phZiZnVmlm1mW03s3vMrG/42j1m5mZ2cbt7fhc+/6nwcZKZ/drMSsOfU2xmvz3Iz2h9/bFHOyoi0ouEx80GM8tsd35ZePzNa3Puh+FzM9u1/ZSZNbcbe6vNbHgPdUNEJCa4+w3u/pNOtJvo7i/3QEgSo5SwkGh1kbv3BaYB04Hb2lxbB3yy9cDMEoDLgA1t2twGzABmAv2ADwFvdfQz2rxu7PJeiIhEl2LgqtYDM5sMpLRtYGYGXAtU0GasbmNhu7G3r7tv7c6gRUR6o/AzrkivpoSFRDV33w48Tyhx0epfwElmNiB8fB6wHNjeps0JwOPuvtVDStz9Hz0Rs4hIFLsX+ESb408C7cfWU4DhwFeBK80sqYdiExHpFcIz1m4zs1VmttvM/mZmyWZ2enh28LfMbDvwNzOLM7NbzWyDmZWb2cNmNrDNZ51sZm+YWaWZbW4z2/geM/tp+H2mmT0dblNhZq+ZWVybWM4Kv+8TnrW8Nfz6nZn1CV9rje0WM9tpZtvM7NM9/WcnvY8SFhLVzCwbOB9Y3+Z0HfAUcGX4+BO8/4G5APiamX3RzCaHv/ETEZGjUwD0N7PxZhYPXAHc167NJwkllh8KH1/Yg/GJiPQWVwPnAqOBscB3w+eHAgOBkcD1wFeAjwCnEUoG7wbuBDCzXOBZ4H+ALEJf8C3r4GfdApSG2wwBvg14B+2+A8wOf85UQjOVv9vm+lAgHRgBfAa4s80XiCIdUsJCotUTZrYX2AzsBH7Q7vo/gE+YWTqhAfyJdtd/DvyS0F8GS4AtZtZ+avIT4Uxz6+tzXd0JEZEo1DrL4mxgDbCl9YKZpRJaoveAuzcC/+T9y0Jmtxt7NyAiEnv+6O6b3b0C+BnvLbdrAX7g7vXuXgt8HviOu5e6ez3wQ+DS8HKRq4EX3H2euze6e7m7L+vgZzUCw4CR4XavuXtHCYurgR+7+053LwN+RGiJX9vP+XH4M54BqoFjj/LPQaKcEhYSrT7i7v2A04FxwAFF3tz9dUJZ4u8CT4cH9LbXm939Tnc/Ccgg9BfBX81sfLufkdHm9X/d1x0RkahxL/Bx4FO8f3bbR4Em4Jnw8f3A+WaW1aZNQbuxd3R3BywiEoE2t3m/kdDsCYAyd69rc20k8HhrkhdYDTQTmimRw4E13A7mdkKzleebWZGZ3XqQdsPDsXQUF0C5uze1Oa4B+nbi50sMU8JCopq7vwLcA9zRweX7CE1x+8DaFO5e6+53EppCN6GrYxQRiSXuvpFQ8c0LgMfaXf4koYfXTeH1148AibQp1CkiIkAo2dAqF2gtPtx+5sNm4Px2id5kd98SvnbIpK+773X3W9x9FHARoWXTZ3bQdCuhBElHcYkcESUsJBb8DjjbzKa1O/8HQlOSX21/g5ndFC4OlGJmCeHlIP14/04hIiJy+D4DnOHu+9qcGwGcSahmxTTeWwP9SzreLUREJJZ9ycyywwU0v817dX/auwv4mZmNBDCzLDObG752P3CWmV0eft4d1MHzMmZ2oZkdE67pVkVohkZzBz9rHvDd8M/IBL7P++sUiRwWJSwk6oXX0P0D+F678xXu/uJB1uDVAr8mtHPILuBLwCXuXtSmzb/MrLrN6/Fu6oKISFRx9w3uvqTd6VOAZe4+3923t74IJZenmNmkcLs57cbeajM7oUc7ICISvAeA+UBR+PXTg7T7PaFi8/PD9d0KgFkA7r6J0Gy3WwhtJb2MUKK4vTHAC4RqTiwE/uTuL3fQ7qeEar8tB94Bln5AXCKdYh3/W01EREREREQijZmVAJ919xeCjkWku2mGhYiIiIiIiIhEHCUsRERERERERCTiaEmIiIiIiIiIiEQczbAQERERERERkYijhIWIiIiIiIiIRJyEoAPoCpmZmZ6Xlxd0GCIi7/Pmm2/ucvesoOPoCRqLRSQSaRwWEQnekY7FUZGwyMvLY8mS9tu5i4gEz8w2Bh1DT9FYLCKRKOhx2MzOA34PxAN/dvdftLs+F/gJ0AI0ATe5++tmlgP8Axgavna3u//+g36WxmERiVRHOhZHRcJCRERERCTSmFk8cCdwNlAKLDazp9x9VZtmLwJPubub2RTgYWAcoeTFLe6+1Mz6AW+a2YJ294qIRDXVsBARERER6R4zgfXuXuTuDcCDwNy2Ddy92t/bti8N8PD5be6+NPx+L7AaGNFjkYuIRAAlLEREREREuscIYHOb41I6SDqY2UfNbA3wb+C6Dq7nAdOBwg6uXW9mS8xsSVlZWVfFLSISEaJ2SUhjYyOlpaXU1dUFHUqPS05OJjs7m8TExKBDEZEYFwtjscZcEfkA1sE5f98J98eBx83sVEL1LM7a/wFmfYFHCdW2qOrg3ruBuwFmzJjxvs8WkcMTC88u3amrn4uiNmFRWlpKv379yMvLw6yjvyuik7tTXl5OaWkp+fn5QYcjIjEu2sdijbkicgilQE6b42xg68Eau/urZjbazDLdfZeZJRJKVtzv7o91c6wiQvQ/u3Sn7nguitolIXV1dQwaNCjm/iMzMwYNGqSMoIhEhGgfizXmisghLAbGmFm+mSUBVwJPtW1gZsdYeJA0s+OAJKA8fO4vwGp3/00Pxy0Ss6L92aU7dcdzUdTOsABi9j+yWO23iESmaB+Tor1/InLk3L3JzG4Enie0relf3X2lmd0Qvn4XcAnwCTNrBGqBK8I7hpwMXAu8Y2bLwh/5bXd/psc7IhJj9Hf7kevqP7uonWERCeLj45k2bRqTJk3ioosuorKyEoCSkhLMjO9973v72+7atYvExERuvPFGANauXcvpp5/OtGnTGD9+PNdffz0AL7/8Munp6UybNm3/64UXXujxvomI9BZmxrXXXrv/uKmpiaysLC688MID2s2dO5c5c+YccO6HP/whI0aMOGDMbR3LRUQ6w92fcfex7j7a3X8WPndXOFmBu//S3Se6+zR3n+Pur4fPv+7u5u5TwtemKVkhIkdiyZIlfOUrXzno9a1bt3LppZf2YESdp4RFN0pJSWHZsmWsWLGCgQMHcuedd+6/NmrUKJ5++un9x4888ggTJ07cf/yVr3yFm2++mWXLlrF69Wq+/OUv7792yimnsGzZsv2vs87aX5dJRETaSUtLY8WKFdTW1gKwYMECRow4sEh/ZWUlS5cupbKykuLi4gOutY7Fra+MjIyeCl1ERETkfZqbmw+r/YwZM/jDH/5w0OvDhw/nn//859GG1S2UsOghc+bMYcuWLfuPU1JSGD9+PEuWLAHgoYce4vLLL99/fdu2bWRnZ+8/njx5cs8FKyISZc4//3z+/e9/AzBv3jyuuuqqA64/+uijXHTRRVx55ZU8+OCDQYQoIiIiQklJCePGjeOTn/wkU6ZM4dJLL6Wmpoa8vDx+/OMfc/LJJ/PII48wf/585syZw3HHHcdll11GdXU1AIsXL+bEE09k6tSpzJw5k7179/Lyyy/vn1n6yiuv7J81On36dPbu3UtJSQmTJk0CQjU8Pv3pTzN58mSmT5/OSy+9BMA999zDxz72Mc477zzGjBnDN7/5zR7584jqGhatfvSvlaza+r5doI7KhOH9+cFFEw/dkFAG7MUXX+Qzn/nMAedbH4yHDh1KfHw
|
|
|
|
"text/plain": [
|
|
|
|
"<Figure size 1296x3024 with 18 Axes>"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"metadata": {
|
|
|
|
"needs_background": "light"
|
|
|
|
},
|
|
|
|
"output_type": "display_data"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"metrics = list(result.columns[[i not in [\"Beta\"] for i in result.columns]])\n",
|
|
|
|
"\n",
|
|
|
|
"charts_per_row = 6\n",
|
|
|
|
"charts_per_column = 3\n",
|
|
|
|
"\n",
|
|
|
|
"fig, axes = plt.subplots(\n",
|
|
|
|
" nrows=charts_per_row, ncols=charts_per_column, figsize=(18, 7 * charts_per_row)\n",
|
|
|
|
")\n",
|
|
|
|
"import itertools\n",
|
|
|
|
"\n",
|
|
|
|
"to_iter = [\n",
|
|
|
|
" i for i in itertools.product(range(charts_per_row), range(charts_per_column))\n",
|
|
|
|
"]\n",
|
|
|
|
"\n",
|
|
|
|
"for i in range(len(metrics)):\n",
|
|
|
|
" df = result[[\"Beta\", metrics[i]]]\n",
|
|
|
|
" df.plot(ax=axes[to_iter[i]], title=metrics[i], x=0, y=1)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"# Check sample recommendations"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 10,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>user</th>\n",
|
|
|
|
" <th>rating</th>\n",
|
|
|
|
" <th>title</th>\n",
|
|
|
|
" <th>genres</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>29798</th>\n",
|
|
|
|
" <td>853</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>5</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>Soul Food (1997)</td>\n",
|
|
|
|
" <td>Drama</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>26282</th>\n",
|
|
|
|
" <td>853</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>5</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>Air Force One (1997)</td>\n",
|
|
|
|
" <td>Action, Thriller</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>73991</th>\n",
|
|
|
|
" <td>853</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>Fire Down Below (1997)</td>\n",
|
|
|
|
" <td>Action, Drama, Thriller</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>24846</th>\n",
|
|
|
|
" <td>853</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>Gattaca (1997)</td>\n",
|
|
|
|
" <td>Drama, Sci-Fi, Thriller</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>52746</th>\n",
|
|
|
|
" <td>853</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>Fly Away Home (1996)</td>\n",
|
|
|
|
" <td>Adventure, Children's</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>46026</th>\n",
|
|
|
|
" <td>853</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>L.A. Confidential (1997)</td>\n",
|
|
|
|
" <td>Crime, Film-Noir, Mystery, Thriller</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>33394</th>\n",
|
|
|
|
" <td>853</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>Game, The (1997)</td>\n",
|
|
|
|
" <td>Mystery, Thriller</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>58732</th>\n",
|
|
|
|
" <td>853</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>Volcano (1997)</td>\n",
|
|
|
|
" <td>Drama, Thriller</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>30970</th>\n",
|
|
|
|
" <td>853</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>Rosewood (1997)</td>\n",
|
|
|
|
" <td>Drama</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>55641</th>\n",
|
|
|
|
" <td>853</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>Hoodlum (1997)</td>\n",
|
|
|
|
" <td>Crime, Drama, Film-Noir</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>19322</th>\n",
|
|
|
|
" <td>853</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>Scream (1996)</td>\n",
|
|
|
|
" <td>Horror, Thriller</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>73653</th>\n",
|
|
|
|
" <td>853</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>Gang Related (1997)</td>\n",
|
|
|
|
" <td>Crime</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>20974</th>\n",
|
|
|
|
" <td>853</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>4</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>I Know What You Did Last Summer (1997)</td>\n",
|
|
|
|
" <td>Horror, Mystery, Thriller</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>62996</th>\n",
|
|
|
|
" <td>853</td>\n",
|
|
|
|
" <td>3</td>\n",
|
|
|
|
" <td>Kiss the Girls (1997)</td>\n",
|
|
|
|
" <td>Crime, Drama, Thriller</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>37091</th>\n",
|
|
|
|
" <td>853</td>\n",
|
|
|
|
" <td>3</td>\n",
|
|
|
|
" <td>Contact (1997)</td>\n",
|
|
|
|
" <td>Drama, Sci-Fi</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
2021-06-12 11:14:56 +02:00
|
|
|
" user rating title \\\n",
|
|
|
|
"29798 853 5 Soul Food (1997) \n",
|
|
|
|
"26282 853 5 Air Force One (1997) \n",
|
|
|
|
"73991 853 4 Fire Down Below (1997) \n",
|
|
|
|
"24846 853 4 Gattaca (1997) \n",
|
|
|
|
"52746 853 4 Fly Away Home (1996) \n",
|
|
|
|
"46026 853 4 L.A. Confidential (1997) \n",
|
|
|
|
"33394 853 4 Game, The (1997) \n",
|
|
|
|
"58732 853 4 Volcano (1997) \n",
|
|
|
|
"30970 853 4 Rosewood (1997) \n",
|
|
|
|
"55641 853 4 Hoodlum (1997) \n",
|
|
|
|
"19322 853 4 Scream (1996) \n",
|
|
|
|
"73653 853 4 Gang Related (1997) \n",
|
|
|
|
"20974 853 4 I Know What You Did Last Summer (1997) \n",
|
|
|
|
"62996 853 3 Kiss the Girls (1997) \n",
|
|
|
|
"37091 853 3 Contact (1997) \n",
|
2021-05-07 22:16:28 +02:00
|
|
|
"\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" genres \n",
|
|
|
|
"29798 Drama \n",
|
|
|
|
"26282 Action, Thriller \n",
|
|
|
|
"73991 Action, Drama, Thriller \n",
|
|
|
|
"24846 Drama, Sci-Fi, Thriller \n",
|
|
|
|
"52746 Adventure, Children's \n",
|
|
|
|
"46026 Crime, Film-Noir, Mystery, Thriller \n",
|
|
|
|
"33394 Mystery, Thriller \n",
|
|
|
|
"58732 Drama, Thriller \n",
|
|
|
|
"30970 Drama \n",
|
|
|
|
"55641 Crime, Drama, Film-Noir \n",
|
|
|
|
"19322 Horror, Thriller \n",
|
|
|
|
"73653 Crime \n",
|
|
|
|
"20974 Horror, Mystery, Thriller \n",
|
|
|
|
"62996 Crime, Drama, Thriller \n",
|
|
|
|
"37091 Drama, Sci-Fi "
|
2021-05-07 22:16:28 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "display_data"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>user</th>\n",
|
|
|
|
" <th>rec_nb</th>\n",
|
|
|
|
" <th>title</th>\n",
|
|
|
|
" <th>genres</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>5009</th>\n",
|
|
|
|
" <td>853.0</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>1</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>Titanic (1997)</td>\n",
|
|
|
|
" <td>Action, Drama, Romance</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>5225</th>\n",
|
|
|
|
" <td>853.0</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>2</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>Full Monty, The (1997)</td>\n",
|
|
|
|
" <td>Comedy</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>8911</th>\n",
|
|
|
|
" <td>853.0</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>3</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>Murder at 1600 (1997)</td>\n",
|
|
|
|
" <td>Mystery, Thriller</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>3809</th>\n",
|
|
|
|
" <td>853.0</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>4</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>Chasing Amy (1997)</td>\n",
|
|
|
|
" <td>Drama, Romance</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>380</th>\n",
|
|
|
|
" <td>853.0</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>5</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>Star Wars (1977)</td>\n",
|
|
|
|
" <td>Action, Adventure, Romance, Sci-Fi, War</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>6337</th>\n",
|
|
|
|
" <td>853.0</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>6</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>Good Will Hunting (1997)</td>\n",
|
|
|
|
" <td>Drama</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>2979</th>\n",
|
|
|
|
" <td>853.0</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>7</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>Return of the Jedi (1983)</td>\n",
|
|
|
|
" <td>Action, Adventure, Romance, Sci-Fi, War</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>9039</th>\n",
|
|
|
|
" <td>853.0</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>8</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>Evita (1996)</td>\n",
|
|
|
|
" <td>Drama, Musical</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>9021</th>\n",
|
|
|
|
" <td>853.0</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>9</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>Seven Years in Tibet (1997)</td>\n",
|
|
|
|
" <td>Drama, War</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <th>7296</th>\n",
|
|
|
|
" <td>853.0</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" <td>10</td>\n",
|
2021-06-12 11:14:56 +02:00
|
|
|
" <td>Fargo (1996)</td>\n",
|
|
|
|
" <td>Crime, Drama, Thriller</td>\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
2021-06-12 11:14:56 +02:00
|
|
|
" user rec_nb title \\\n",
|
|
|
|
"5009 853.0 1 Titanic (1997) \n",
|
|
|
|
"5225 853.0 2 Full Monty, The (1997) \n",
|
|
|
|
"8911 853.0 3 Murder at 1600 (1997) \n",
|
|
|
|
"3809 853.0 4 Chasing Amy (1997) \n",
|
|
|
|
"380 853.0 5 Star Wars (1977) \n",
|
|
|
|
"6337 853.0 6 Good Will Hunting (1997) \n",
|
|
|
|
"2979 853.0 7 Return of the Jedi (1983) \n",
|
|
|
|
"9039 853.0 8 Evita (1996) \n",
|
|
|
|
"9021 853.0 9 Seven Years in Tibet (1997) \n",
|
|
|
|
"7296 853.0 10 Fargo (1996) \n",
|
2021-05-07 22:16:28 +02:00
|
|
|
"\n",
|
|
|
|
" genres \n",
|
2021-06-12 11:14:56 +02:00
|
|
|
"5009 Action, Drama, Romance \n",
|
|
|
|
"5225 Comedy \n",
|
|
|
|
"8911 Mystery, Thriller \n",
|
|
|
|
"3809 Drama, Romance \n",
|
|
|
|
"380 Action, Adventure, Romance, Sci-Fi, War \n",
|
|
|
|
"6337 Drama \n",
|
|
|
|
"2979 Action, Adventure, Romance, Sci-Fi, War \n",
|
|
|
|
"9039 Drama, Musical \n",
|
|
|
|
"9021 Drama, War \n",
|
|
|
|
"7296 Crime, Drama, Thriller "
|
2021-05-07 22:16:28 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 10,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"train = pd.read_csv(\n",
|
|
|
|
" \"./Datasets/ml-100k/train.csv\",\n",
|
|
|
|
" sep=\"\\t\",\n",
|
|
|
|
" header=None,\n",
|
|
|
|
" names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n",
|
|
|
|
")\n",
|
|
|
|
"items = pd.read_csv(\"./Datasets/ml-100k/movies.csv\")\n",
|
|
|
|
"\n",
|
|
|
|
"user = random.choice(list(set(train[\"user\"])))\n",
|
|
|
|
"\n",
|
|
|
|
"train_content = pd.merge(train, items, left_on=\"item\", right_on=\"id\")\n",
|
|
|
|
"display(\n",
|
|
|
|
" train_content[train_content[\"user\"] == user][\n",
|
|
|
|
" [\"user\", \"rating\", \"title\", \"genres\"]\n",
|
|
|
|
" ].sort_values(by=\"rating\", ascending=False)[:15]\n",
|
|
|
|
")\n",
|
|
|
|
"\n",
|
|
|
|
"reco = np.loadtxt(\"Recommendations generated/ml-100k/Self_P3_reco.csv\", delimiter=\",\")\n",
|
|
|
|
"items = pd.read_csv(\"./Datasets/ml-100k/movies.csv\")\n",
|
|
|
|
"\n",
|
|
|
|
"# Let's ignore scores - they are not used in evaluation:\n",
|
|
|
|
"reco_users = reco[:, :1]\n",
|
|
|
|
"reco_items = reco[:, 1::2]\n",
|
|
|
|
"# Let's put them into one array\n",
|
|
|
|
"reco = np.concatenate((reco_users, reco_items), axis=1)\n",
|
|
|
|
"\n",
|
|
|
|
"# Let's rebuild it user-item dataframe\n",
|
|
|
|
"recommended = []\n",
|
|
|
|
"for row in reco:\n",
|
|
|
|
" for rec_nb, entry in enumerate(row[1:]):\n",
|
|
|
|
" recommended.append((row[0], rec_nb + 1, entry))\n",
|
|
|
|
"recommended = pd.DataFrame(recommended, columns=[\"user\", \"rec_nb\", \"item\"])\n",
|
|
|
|
"\n",
|
|
|
|
"recommended_content = pd.merge(recommended, items, left_on=\"item\", right_on=\"id\")\n",
|
|
|
|
"recommended_content[recommended_content[\"user\"] == user][\n",
|
|
|
|
" [\"user\", \"rec_nb\", \"title\", \"genres\"]\n",
|
|
|
|
"].sort_values(by=\"rec_nb\")"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"# project task 5: generate recommendations of RP3Beta for hyperparameters found to optimize recall"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 11,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# We generated recommendations for P3, a special case of RP3Beta (with alpha=1, beta=0).\n",
|
|
|
|
"# We've observed that changing alpha and beta impacts the model performance.\n",
|
|
|
|
"\n",
|
2021-05-08 11:46:21 +02:00
|
|
|
"# Your task is find values alpha and beta for which recall will be the highest \n",
|
|
|
|
"# (any solution with recall higher than P3 will be accepted)\n",
|
|
|
|
"# train the model and generate recommendations.\n",
|
2021-05-07 22:16:28 +02:00
|
|
|
"\n",
|
|
|
|
"# save the outptut in 'Recommendations generated/ml-100k/Self_RP3Beta_estimations.csv'\n",
|
|
|
|
"# and 'Recommendations generated/ml-100k/Self_RP3Beta_reco.csv'"
|
|
|
|
]
|
|
|
|
},
|
2021-06-12 11:14:56 +02:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 36,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"model = RP3Beta()\n",
|
|
|
|
"model.fit(train_ui, alpha=0.6, beta=0.6)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 37,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"top_n = pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
|
|
|
|
"\n",
|
|
|
|
"top_n.to_csv(\n",
|
|
|
|
" \"Recommendations generated/ml-100k/Self_RP3Beta_reco.csv\", index=False, header=False\n",
|
|
|
|
")\n",
|
|
|
|
"\n",
|
|
|
|
"estimations = pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
|
|
|
|
"estimations.to_csv(\n",
|
|
|
|
" \"Recommendations generated/ml-100k/Self_RP3Beta_estimations.csv\",\n",
|
|
|
|
" index=False,\n",
|
|
|
|
" header=False,\n",
|
|
|
|
")"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 38,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stderr",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"943it [00:00, 12405.33it/s]\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>RMSE</th>\n",
|
|
|
|
" <th>MAE</th>\n",
|
|
|
|
" <th>precision</th>\n",
|
|
|
|
" <th>recall</th>\n",
|
|
|
|
" <th>F_1</th>\n",
|
|
|
|
" <th>F_05</th>\n",
|
|
|
|
" <th>precision_super</th>\n",
|
|
|
|
" <th>recall_super</th>\n",
|
|
|
|
" <th>NDCG</th>\n",
|
|
|
|
" <th>mAP</th>\n",
|
|
|
|
" <th>MRR</th>\n",
|
|
|
|
" <th>LAUC</th>\n",
|
|
|
|
" <th>HR</th>\n",
|
|
|
|
" <th>HitRate2</th>\n",
|
|
|
|
" <th>HitRate3</th>\n",
|
|
|
|
" <th>Reco in test</th>\n",
|
|
|
|
" <th>Test coverage</th>\n",
|
|
|
|
" <th>Shannon</th>\n",
|
|
|
|
" <th>Gini</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>3.675385</td>\n",
|
|
|
|
" <td>3.499644</td>\n",
|
|
|
|
" <td>0.321209</td>\n",
|
|
|
|
" <td>0.212728</td>\n",
|
|
|
|
" <td>0.210025</td>\n",
|
|
|
|
" <td>0.245804</td>\n",
|
|
|
|
" <td>0.240021</td>\n",
|
|
|
|
" <td>0.275765</td>\n",
|
|
|
|
" <td>0.39005</td>\n",
|
|
|
|
" <td>0.252127</td>\n",
|
|
|
|
" <td>0.641583</td>\n",
|
|
|
|
" <td>0.604033</td>\n",
|
|
|
|
" <td>0.898197</td>\n",
|
|
|
|
" <td>0.725345</td>\n",
|
|
|
|
" <td>0.552492</td>\n",
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
" <td>0.157287</td>\n",
|
|
|
|
" <td>4.524904</td>\n",
|
|
|
|
" <td>0.951442</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
|
|
|
" RMSE MAE precision recall F_1 F_05 \\\n",
|
|
|
|
"0 3.675385 3.499644 0.321209 0.212728 0.210025 0.245804 \n",
|
|
|
|
"\n",
|
|
|
|
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
|
|
|
|
"0 0.240021 0.275765 0.39005 0.252127 0.641583 0.604033 \n",
|
|
|
|
"\n",
|
|
|
|
" HR HitRate2 HitRate3 Reco in test Test coverage Shannon \\\n",
|
|
|
|
"0 0.898197 0.725345 0.552492 1.0 0.157287 4.524904 \n",
|
|
|
|
"\n",
|
|
|
|
" Gini \n",
|
|
|
|
"0 0.951442 "
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 38,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"import evaluation_measures as ev\n",
|
|
|
|
"\n",
|
|
|
|
"estimations_df = pd.read_csv(\n",
|
|
|
|
" \"Recommendations generated/ml-100k/Self_RP3Beta_estimations.csv\", header=None\n",
|
|
|
|
")\n",
|
|
|
|
"reco = np.loadtxt(\"Recommendations generated/ml-100k/Self_RP3Beta_reco.csv\", delimiter=\",\")\n",
|
|
|
|
"\n",
|
|
|
|
"ev.evaluate(\n",
|
|
|
|
" test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n",
|
|
|
|
" estimations_df=estimations_df,\n",
|
|
|
|
" reco=reco,\n",
|
|
|
|
" super_reactions=[4, 5],\n",
|
|
|
|
")"
|
|
|
|
]
|
|
|
|
},
|
2021-05-07 22:16:28 +02:00
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"# project task 6 (optional): implement graph-based model of your choice "
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2021-06-12 11:14:56 +02:00
|
|
|
"execution_count": 9,
|
2021-05-07 22:16:28 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# for example change length of paths in RP3beta or make some other modification (but change more than input and hyperparameters)\n",
|
|
|
|
"# feel free to implement your idea or search for some ideas\n",
|
|
|
|
"\n",
|
|
|
|
"# save the outptut in 'Recommendations generated/ml-100k/Self_GraphTask_estimations.csv'\n",
|
|
|
|
"# and 'Recommendations generated/ml-100k/Self_GraphTask_reco.csv'"
|
|
|
|
]
|
2021-06-12 11:14:56 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 10,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"class RP3Custom:\n",
|
|
|
|
" def fit(self, train_ui, alpha, beta):\n",
|
|
|
|
" \"\"\"We weight our edges by user's explicit ratings so if user rated movie high we'll follow that path\n",
|
|
|
|
" with higher probability.\"\"\"\n",
|
|
|
|
" self.train_ui = train_ui\n",
|
|
|
|
" self.train_iu = train_ui.transpose()\n",
|
|
|
|
"\n",
|
|
|
|
" self.alpha = alpha\n",
|
|
|
|
" self.beta = beta\n",
|
|
|
|
"\n",
|
|
|
|
" # Define Pui\n",
|
|
|
|
" Pui = sparse.csr_matrix(self.train_ui / self.train_ui.sum(axis=1))\n",
|
|
|
|
"\n",
|
|
|
|
" # Define Piu\n",
|
|
|
|
" to_divide = np.vectorize(lambda x: x if x > 0 else 1)(\n",
|
|
|
|
" self.train_iu.sum(axis=1)\n",
|
|
|
|
" ) # to avoid dividing by zero\n",
|
|
|
|
" Piu = sparse.csr_matrix(self.train_iu / to_divide)\n",
|
|
|
|
" item_orders = (self.train_ui > 0).sum(axis=0)\n",
|
|
|
|
"\n",
|
|
|
|
" PuiAlfa = Pui.power(self.alpha)\n",
|
|
|
|
" PiuAlfa = Piu.power(self.alpha)\n",
|
|
|
|
" PuiBeta = Pui.power(self.beta)\n",
|
|
|
|
" PiuBeta = Piu.power(self.beta)\n",
|
|
|
|
"\n",
|
|
|
|
" P3 = PuiAlfa * PiuAlfa * PuiAlfa * PuiBeta * PiuBeta\n",
|
|
|
|
"\n",
|
|
|
|
" P3 /= np.power(\n",
|
|
|
|
" np.vectorize(lambda x: x if x > 0 else 1)(item_orders), self.beta\n",
|
|
|
|
" )\n",
|
|
|
|
"\n",
|
|
|
|
" self.estimations = np.array(P3)\n",
|
|
|
|
"\n",
|
|
|
|
" def recommend(self, user_code_id, item_code_id, topK=10):\n",
|
|
|
|
"\n",
|
|
|
|
" top_k = defaultdict(list)\n",
|
|
|
|
" for nb_user, user in enumerate(self.estimations):\n",
|
|
|
|
"\n",
|
|
|
|
" user_rated = self.train_ui.indices[\n",
|
|
|
|
" self.train_ui.indptr[nb_user] : self.train_ui.indptr[nb_user + 1]\n",
|
|
|
|
" ]\n",
|
|
|
|
" for item, score in enumerate(user):\n",
|
|
|
|
" if item not in user_rated and not np.isnan(score):\n",
|
|
|
|
" top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n",
|
|
|
|
" result = []\n",
|
|
|
|
" # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n",
|
|
|
|
" for uid, item_scores in top_k.items():\n",
|
|
|
|
" item_scores.sort(key=lambda x: x[1], reverse=True)\n",
|
|
|
|
" result.append([uid] + list(chain(*item_scores[:topK])))\n",
|
|
|
|
" return result\n",
|
|
|
|
"\n",
|
|
|
|
" def estimate(self, user_code_id, item_code_id, test_ui):\n",
|
|
|
|
" result = []\n",
|
|
|
|
" for user, item in zip(*test_ui.nonzero()):\n",
|
|
|
|
" result.append(\n",
|
|
|
|
" [\n",
|
|
|
|
" user_code_id[user],\n",
|
|
|
|
" item_code_id[item],\n",
|
|
|
|
" self.estimations[user, item]\n",
|
|
|
|
" if not np.isnan(self.estimations[user, item])\n",
|
|
|
|
" else 1,\n",
|
|
|
|
" ]\n",
|
|
|
|
" )\n",
|
|
|
|
" return result"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 12,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stderr",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"943it [00:00, 13313.77it/s]\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>RMSE</th>\n",
|
|
|
|
" <th>MAE</th>\n",
|
|
|
|
" <th>precision</th>\n",
|
|
|
|
" <th>recall</th>\n",
|
|
|
|
" <th>F_1</th>\n",
|
|
|
|
" <th>F_05</th>\n",
|
|
|
|
" <th>precision_super</th>\n",
|
|
|
|
" <th>recall_super</th>\n",
|
|
|
|
" <th>NDCG</th>\n",
|
|
|
|
" <th>mAP</th>\n",
|
|
|
|
" <th>MRR</th>\n",
|
|
|
|
" <th>LAUC</th>\n",
|
|
|
|
" <th>HR</th>\n",
|
|
|
|
" <th>HitRate2</th>\n",
|
|
|
|
" <th>HitRate3</th>\n",
|
|
|
|
" <th>Reco in test</th>\n",
|
|
|
|
" <th>Test coverage</th>\n",
|
|
|
|
" <th>Shannon</th>\n",
|
|
|
|
" <th>Gini</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>3.702928</td>\n",
|
|
|
|
" <td>3.527713</td>\n",
|
|
|
|
" <td>0.322694</td>\n",
|
|
|
|
" <td>0.216069</td>\n",
|
|
|
|
" <td>0.212152</td>\n",
|
|
|
|
" <td>0.247538</td>\n",
|
|
|
|
" <td>0.245279</td>\n",
|
|
|
|
" <td>0.284983</td>\n",
|
|
|
|
" <td>0.388271</td>\n",
|
|
|
|
" <td>0.248239</td>\n",
|
|
|
|
" <td>0.636318</td>\n",
|
|
|
|
" <td>0.605683</td>\n",
|
|
|
|
" <td>0.910923</td>\n",
|
|
|
|
" <td>0.731707</td>\n",
|
|
|
|
" <td>0.554613</td>\n",
|
|
|
|
" <td>0.999788</td>\n",
|
|
|
|
" <td>0.178932</td>\n",
|
|
|
|
" <td>4.549663</td>\n",
|
|
|
|
" <td>0.950182</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
|
|
|
" RMSE MAE precision recall F_1 F_05 \\\n",
|
|
|
|
"0 3.702928 3.527713 0.322694 0.216069 0.212152 0.247538 \n",
|
|
|
|
"\n",
|
|
|
|
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
|
|
|
|
"0 0.245279 0.284983 0.388271 0.248239 0.636318 0.605683 \n",
|
|
|
|
"\n",
|
|
|
|
" HR HitRate2 HitRate3 Reco in test Test coverage Shannon \\\n",
|
|
|
|
"0 0.910923 0.731707 0.554613 0.999788 0.178932 4.549663 \n",
|
|
|
|
"\n",
|
|
|
|
" Gini \n",
|
|
|
|
"0 0.950182 "
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 12,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"myModel = RP3Custom()\n",
|
|
|
|
"model.fit(train_ui, alpha=0.8, beta=0.6)\n",
|
|
|
|
"\n",
|
|
|
|
"top_n = pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
|
|
|
|
"\n",
|
|
|
|
"top_n.to_csv(\n",
|
|
|
|
" \"Recommendations generated/ml-100k/Self_GraphTask_reco.csv\", index=False, header=False\n",
|
|
|
|
")\n",
|
|
|
|
"\n",
|
|
|
|
"estimations = pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
|
|
|
|
"estimations.to_csv(\n",
|
|
|
|
" \"Recommendations generated/ml-100k/Self_GraphTask_estimations.csv\",\n",
|
|
|
|
" index=False,\n",
|
|
|
|
" header=False,\n",
|
|
|
|
")\n",
|
|
|
|
"\n",
|
|
|
|
"estimations_df = pd.read_csv(\n",
|
|
|
|
" \"Recommendations generated/ml-100k/Self_GraphTask_estimations.csv\", header=None\n",
|
|
|
|
")\n",
|
|
|
|
"reco = np.loadtxt(\"Recommendations generated/ml-100k/Self_GraphTask_reco.csv\", delimiter=\",\")\n",
|
|
|
|
"\n",
|
|
|
|
"ev.evaluate(\n",
|
|
|
|
" test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n",
|
|
|
|
" estimations_df=estimations_df,\n",
|
|
|
|
" reco=reco,\n",
|
|
|
|
" super_reactions=[4, 5],\n",
|
|
|
|
")"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": []
|
2021-05-07 22:16:28 +02:00
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
2021-06-12 11:14:56 +02:00
|
|
|
"version": "3.8.8"
|
2021-05-07 22:16:28 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 4
|
|
|
|
}
|