workshops_recommender_systems/P5. Graph-based.ipynb

2435 lines
608 KiB
Plaintext
Raw Normal View History

2020-06-15 00:15:17 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Self made RP3-beta"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import helpers\n",
"import pandas as pd\n",
"import numpy as np\n",
"import scipy.sparse as sparse\n",
"from collections import defaultdict\n",
"from itertools import chain\n",
"import random\n",
"import time\n",
"import matplotlib.pyplot as plt\n",
"\n",
"train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n",
"test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
"train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class RP3Beta():\n",
" def fit(self, train_ui, alpha, beta):\n",
" \"\"\"We weight our edges by user's explicit ratings so if user rated movie high we'll follow that path\n",
" with higher probability.\"\"\"\n",
" self.train_ui=train_ui\n",
" self.train_iu=train_ui.transpose()\n",
" \n",
" self.alpha = alpha\n",
" self.beta = beta\n",
" \n",
" # Define Pui \n",
" Pui=sparse.csr_matrix(self.train_ui/self.train_ui.sum(axis=1))\n",
" \n",
" # Define Piu\n",
" to_divide=np.vectorize(lambda x: x if x>0 else 1)(self.train_iu.sum(axis=1)) # to avoid dividing by zero\n",
" Piu=sparse.csr_matrix(self.train_iu/to_divide)\n",
" item_orders=(self.train_ui>0).sum(axis=0)\n",
" \n",
" Pui = Pui.power(self.alpha)\n",
" Piu = Piu.power(self.alpha)\n",
"\n",
" P3=Pui*Piu*Pui\n",
" \n",
" P3/=np.power(np.vectorize(lambda x: x if x>0 else 1)(item_orders), self.beta)\n",
" \n",
" self.estimations=np.array(P3)\n",
" \n",
" def recommend(self, user_code_id, item_code_id, topK=10):\n",
" \n",
" top_k = defaultdict(list)\n",
" for nb_user, user in enumerate(self.estimations):\n",
" \n",
" user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]\n",
" for item, score in enumerate(user):\n",
" if item not in user_rated and not np.isnan(score):\n",
" top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n",
" result=[]\n",
" # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n",
" for uid, item_scores in top_k.items():\n",
" item_scores.sort(key=lambda x: x[1], reverse=True)\n",
" result.append([uid]+list(chain(*item_scores[:topK])))\n",
" return result\n",
" \n",
" def estimate(self, user_code_id, item_code_id, test_ui):\n",
" result=[]\n",
" for user, item in zip(*test_ui.nonzero()):\n",
" result.append([user_code_id[user], item_code_id[item], \n",
" self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"model=RP3Beta()\n",
"model.fit(train_ui, alpha=1, beta=0)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
"\n",
"top_n.to_csv('Recommendations generated/ml-100k/Self_P3_reco.csv', index=False, header=False)\n",
"\n",
"estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
"estimations.to_csv('Recommendations generated/ml-100k/Self_P3_estimations.csv', index=False, header=False)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"943it [00:00, 7526.70it/s]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>HR2</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3.702446</td>\n",
" <td>3.527273</td>\n",
" <td>0.282185</td>\n",
" <td>0.192092</td>\n",
" <td>0.186749</td>\n",
" <td>0.21698</td>\n",
" <td>0.204185</td>\n",
" <td>0.240096</td>\n",
" <td>0.339114</td>\n",
" <td>0.204905</td>\n",
" <td>0.572157</td>\n",
" <td>0.593544</td>\n",
" <td>0.875928</td>\n",
" <td>0.685048</td>\n",
" <td>1.0</td>\n",
" <td>0.077201</td>\n",
" <td>3.875892</td>\n",
" <td>0.974947</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" RMSE MAE precision recall F_1 F_05 \\\n",
"0 3.702446 3.527273 0.282185 0.192092 0.186749 0.21698 \n",
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
"\n",
" HR HR2 Reco in test Test coverage Shannon Gini \n",
"0 0.875928 0.685048 1.0 0.077201 3.875892 0.974947 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import evaluation_measures as ev\n",
"estimations_df=pd.read_csv('Recommendations generated/ml-100k/Self_P3_estimations.csv', header=None)\n",
"reco=np.loadtxt('Recommendations generated/ml-100k/Self_P3_reco.csv', delimiter=',')\n",
"\n",
"ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
" estimations_df=estimations_df, \n",
" reco=reco,\n",
" super_reactions=[4,5])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Let's check hiperparameters"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Alpha"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/8 [00:00<?, ?it/s]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7444.80it/s]\u001b[A\n",
" 12%|█▎ | 1/8 [00:09<01:07, 9.64s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7203.06it/s]\u001b[A\n",
" 25%|██▌ | 2/8 [00:19<00:58, 9.69s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7374.19it/s]\u001b[A\n",
" 38%|███▊ | 3/8 [00:28<00:47, 9.44s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7637.26it/s]\u001b[A\n",
" 50%|█████ | 4/8 [00:37<00:36, 9.23s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7968.92it/s]\u001b[A\n",
" 62%|██████▎ | 5/8 [00:45<00:27, 9.14s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 6917.17it/s]\u001b[A\n",
" 75%|███████▌ | 6/8 [00:54<00:18, 9.06s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7768.07it/s]\u001b[A\n",
" 88%|████████▊ | 7/8 [01:03<00:08, 8.96s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7708.43it/s]\u001b[A\n",
"100%|██████████| 8/8 [01:12<00:00, 9.06s/it]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Alpha</th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>HR2</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.2</td>\n",
" <td>268.177832</td>\n",
" <td>211.732649</td>\n",
" <td>0.262672</td>\n",
" <td>0.166858</td>\n",
" <td>0.166277</td>\n",
" <td>0.197184</td>\n",
" <td>0.187661</td>\n",
" <td>0.203252</td>\n",
" <td>0.320910</td>\n",
" <td>0.196132</td>\n",
" <td>0.563378</td>\n",
" <td>0.580866</td>\n",
" <td>0.850477</td>\n",
" <td>0.629905</td>\n",
" <td>1.000000</td>\n",
" <td>0.060606</td>\n",
" <td>3.669627</td>\n",
" <td>0.979636</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.4</td>\n",
" <td>10.546689</td>\n",
" <td>7.792373</td>\n",
" <td>0.268505</td>\n",
" <td>0.172669</td>\n",
" <td>0.171569</td>\n",
" <td>0.202643</td>\n",
" <td>0.192489</td>\n",
" <td>0.212653</td>\n",
" <td>0.326760</td>\n",
" <td>0.200172</td>\n",
" <td>0.565148</td>\n",
" <td>0.583801</td>\n",
" <td>0.854719</td>\n",
" <td>0.644751</td>\n",
" <td>1.000000</td>\n",
" <td>0.064214</td>\n",
" <td>3.726996</td>\n",
" <td>0.978426</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.6</td>\n",
" <td>3.143988</td>\n",
" <td>2.948790</td>\n",
" <td>0.274655</td>\n",
" <td>0.180502</td>\n",
" <td>0.177820</td>\n",
" <td>0.208730</td>\n",
" <td>0.198176</td>\n",
" <td>0.222746</td>\n",
" <td>0.332872</td>\n",
" <td>0.203290</td>\n",
" <td>0.568872</td>\n",
" <td>0.587738</td>\n",
" <td>0.870626</td>\n",
" <td>0.657476</td>\n",
" <td>1.000000</td>\n",
" <td>0.065657</td>\n",
" <td>3.785282</td>\n",
" <td>0.977090</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.8</td>\n",
" <td>3.670728</td>\n",
" <td>3.495735</td>\n",
" <td>0.281972</td>\n",
" <td>0.189868</td>\n",
" <td>0.185300</td>\n",
" <td>0.216071</td>\n",
" <td>0.203541</td>\n",
" <td>0.236751</td>\n",
" <td>0.339867</td>\n",
" <td>0.206688</td>\n",
" <td>0.573729</td>\n",
" <td>0.592432</td>\n",
" <td>0.874867</td>\n",
" <td>0.685048</td>\n",
" <td>1.000000</td>\n",
" <td>0.070707</td>\n",
" <td>3.832415</td>\n",
" <td>0.975998</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>3.702446</td>\n",
" <td>3.527273</td>\n",
" <td>0.282185</td>\n",
" <td>0.192092</td>\n",
" <td>0.186749</td>\n",
" <td>0.216980</td>\n",
" <td>0.204185</td>\n",
" <td>0.240096</td>\n",
" <td>0.339114</td>\n",
" <td>0.204905</td>\n",
" <td>0.572157</td>\n",
" <td>0.593544</td>\n",
" <td>0.875928</td>\n",
" <td>0.685048</td>\n",
" <td>1.000000</td>\n",
" <td>0.077201</td>\n",
" <td>3.875892</td>\n",
" <td>0.974947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.2</td>\n",
" <td>3.704441</td>\n",
" <td>3.529251</td>\n",
" <td>0.280912</td>\n",
" <td>0.193633</td>\n",
" <td>0.187311</td>\n",
" <td>0.216872</td>\n",
" <td>0.203004</td>\n",
" <td>0.240588</td>\n",
" <td>0.338049</td>\n",
" <td>0.203453</td>\n",
" <td>0.571830</td>\n",
" <td>0.594313</td>\n",
" <td>0.883351</td>\n",
" <td>0.681866</td>\n",
" <td>1.000000</td>\n",
" <td>0.085859</td>\n",
" <td>3.910718</td>\n",
" <td>0.974073</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.4</td>\n",
" <td>3.704580</td>\n",
" <td>3.529388</td>\n",
" <td>0.273595</td>\n",
" <td>0.190651</td>\n",
" <td>0.183874</td>\n",
" <td>0.212183</td>\n",
" <td>0.199464</td>\n",
" <td>0.239118</td>\n",
" <td>0.329550</td>\n",
" <td>0.195433</td>\n",
" <td>0.566171</td>\n",
" <td>0.592793</td>\n",
" <td>0.871686</td>\n",
" <td>0.675504</td>\n",
" <td>1.000000</td>\n",
" <td>0.107504</td>\n",
" <td>3.961915</td>\n",
" <td>0.972674</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.6</td>\n",
" <td>3.704591</td>\n",
" <td>3.529399</td>\n",
" <td>0.263097</td>\n",
" <td>0.186255</td>\n",
" <td>0.178709</td>\n",
" <td>0.205170</td>\n",
" <td>0.191094</td>\n",
" <td>0.232920</td>\n",
" <td>0.317439</td>\n",
" <td>0.184917</td>\n",
" <td>0.552349</td>\n",
" <td>0.590545</td>\n",
" <td>0.868505</td>\n",
" <td>0.669141</td>\n",
" <td>0.999576</td>\n",
" <td>0.156566</td>\n",
" <td>4.060156</td>\n",
" <td>0.969203</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Alpha RMSE MAE precision recall F_1 F_05 \\\n",
"0 0.2 268.177832 211.732649 0.262672 0.166858 0.166277 0.197184 \n",
"0 0.4 10.546689 7.792373 0.268505 0.172669 0.171569 0.202643 \n",
"0 0.6 3.143988 2.948790 0.274655 0.180502 0.177820 0.208730 \n",
"0 0.8 3.670728 3.495735 0.281972 0.189868 0.185300 0.216071 \n",
"0 1.0 3.702446 3.527273 0.282185 0.192092 0.186749 0.216980 \n",
"0 1.2 3.704441 3.529251 0.280912 0.193633 0.187311 0.216872 \n",
"0 1.4 3.704580 3.529388 0.273595 0.190651 0.183874 0.212183 \n",
"0 1.6 3.704591 3.529399 0.263097 0.186255 0.178709 0.205170 \n",
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
"0 0.187661 0.203252 0.320910 0.196132 0.563378 0.580866 \n",
"0 0.192489 0.212653 0.326760 0.200172 0.565148 0.583801 \n",
"0 0.198176 0.222746 0.332872 0.203290 0.568872 0.587738 \n",
"0 0.203541 0.236751 0.339867 0.206688 0.573729 0.592432 \n",
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
"0 0.203004 0.240588 0.338049 0.203453 0.571830 0.594313 \n",
"0 0.199464 0.239118 0.329550 0.195433 0.566171 0.592793 \n",
"0 0.191094 0.232920 0.317439 0.184917 0.552349 0.590545 \n",
"\n",
" HR HR2 Reco in test Test coverage Shannon Gini \n",
"0 0.850477 0.629905 1.000000 0.060606 3.669627 0.979636 \n",
"0 0.854719 0.644751 1.000000 0.064214 3.726996 0.978426 \n",
"0 0.870626 0.657476 1.000000 0.065657 3.785282 0.977090 \n",
"0 0.874867 0.685048 1.000000 0.070707 3.832415 0.975998 \n",
"0 0.875928 0.685048 1.000000 0.077201 3.875892 0.974947 \n",
"0 0.883351 0.681866 1.000000 0.085859 3.910718 0.974073 \n",
"0 0.871686 0.675504 1.000000 0.107504 3.961915 0.972674 \n",
"0 0.868505 0.669141 0.999576 0.156566 4.060156 0.969203 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from tqdm import tqdm\n",
"result=[]\n",
"for alpha in tqdm([round(i,1) for i in np.arange(0.2,1.6001,0.2)]):\n",
" model=RP3Beta()\n",
" model.fit(train_ui, alpha=alpha, beta=0)\n",
" reco=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
" estimations_df=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
" to_append=ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
" estimations_df=estimations_df, \n",
" reco=np.array(reco),\n",
" super_reactions=[4,5])\n",
" to_append.insert(0, \"Alpha\", alpha)\n",
" result.append(to_append)\n",
" \n",
"result=pd.concat(result)\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABCQAAAkoCAYAAACzg26yAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzde3xV1Z3//9cn9xxu4YQACSSAiso1oIBYpCqiFUvQttbq1LZTO+NMq+102vl9x077aztOfcxUextb2mk77dhOLd6mF2LxXmz1OyKiEq6iKPcECOEWIBeSfL5/nJ14iIkQyDk7Oef9fDzOg3PWXvvksxWXO5+91meZuyMiIiIiIiIikkwZYQcgIiIiIiIiIulHCQkRERERERERSTolJEREREREREQk6ZSQEBEREREREZGkU0JCRERERERERJJOCQkRERERERERSTolJERERERERKSDmf2Hmf3/p9BvvZldloSQJEUpISEpx8y2mlmDmR0xs91mdp+ZDQyO3WdmbmbXdjrnu0H7Xwafc8zs22a2M/ierWb2vW5+RvvrB0m9UBGRfiQYN5vNbFin9leD8XdsXNvXg7aLOvX9SzNr7TT2HjGzkuRchYhIenD3v3X3fzmFfpPc/dkkhCQpSgkJSVUV7j4QmAZMB74Ud+x14OPtH8wsC7gBeDOuz5eAGcAsYBBwGfBKVz8j7nV7r1+FiEhq2QLc1P7BzKYAkfgOZmbExuj9xI3VcV7oNPYOdPfqRAYtItIfBfe4In2aEhKS0tx9N/AEscREu0rgEjMbGny+GlgD7I7rMxP4rbtXe8xWd/9lUoIWEUld/82JSYZPAJ3H1rlAMfA54EYzy0lSbCIi/UIw4+xLZrbBzA6Y2X+ZWZ6ZXRbM7v1HM9sN/JeZZZjZHWb2ppnVmdlDZhaN+65LzOx/zeygme2Imy18n5l9I3g/zMweDfrsN7PnzCwjLpb5wftcM/uemVUHr++ZWW5wrD22L5rZXjOrMbNPJvufnfQ9SkhISjOz0cACYHNccyPwe+DG4PPHeecN8QrgC2b2GTObEjyxExGRM7MCGGxmE8wsk9g4/KtOfT5BLHH8UPC5IonxiYj0Fx8F3gecDZwLfCVoHwlEgTHArcBngeuAS4ES4ACwGMDMxgCPAd8Hiog9wFvdxc/6IrAz6DMC+CfAu+j3ZWB28D3lxGYafyXu+EhgCDAK+BSwOO4BoaQpJSQkVf3OzOqBHcBe4Gudjv8S+LiZFRAboH/X6fi/At8kNtivAnaZ2Se6+BkH415/3etXISKSetpnSVwJbAR2tR8wswjwYeDX7n4ceIR3LtuY3WnsfRMRkfTzA3ff4e77gbt4ezlcG/A1d29y9wbgb4Evu/tOd28Cvg5cHyzn+AvgaXdf4u7H3b3O3btKSBwnNnNtTNDvOXfvKiHxUeBOd9/r7rXAPwMf6/Q9dwbfsQw4Apx3pv8gpH9TQkJS1XXu3l774XzghCJq7v48sSzvl4FHgwE7/niruy929zlAAbGB/udmNqHTzyiIe/00gdcjIpIq/pvYTfBf8s7ZaR8AWoBlwef7gQVmVhTXZ0WnsffsRAcsItIH7Yh7v43Y7AeAWndvjDs2BvhtexKXWCK4ldhMh1JOrKHWnXuIzTZ+0szeMrM7uulXEsTSVVwAde7eEvf5GDDwFH6+pDAlJCSlufufgPuAb3Vx+FfEpqC9a20Id29w98XEprhN7O0YRUTSibtvI1bc8hrgN50Of4LYzen2YP3zw0A2sQSGiIi8rTTufRnQXty388yFHcCCToncPHffFRw7aVLX3evd/YvufhawiNiy5iu66FpNLAHSVVwiXVJCQtLB94Arzay8U/u9xKYM/7nzCWb2+aD4Tr6ZZQXLNQYBryY+XBGRlPcpYJ67H41rGwVcASwktv64fQ3yN+l6tw0RkXR2m5mNDgpUfhl4sJt+/wHcFdSLwMyKzOza4Nj9wHwzuyG43y00s2mdv8DMFprZOUFNtUPEZli0dfGzlgBfCX7GMOCrvLNOkMgJlJCQlBesYfslsUExvn2/uz/TzRq4Y8C3ie28sQ+4DfiQu78V16fSzI7EvX6boEsQEUkp7v6mu6/q1DwXWO3uT7r77vYXseTxVDObHPS7uNPYe8TMZib1AkREwvdr4EngLWLLLr7RTb9/B5YSW25RT6y48EUA7r6d2Gy1LxLbank1sURwZ+OBp4nVfHgB+KG7L++i3zeI1V5bA6wFXnmXuEQAsK5/FxMREREREZG+xsy2An/l7k+HHYvImdIMCRERERERERFJOiUkRERERERERCTptGRDRERERERERJJOMyREREREREREJOmUkBARERERERGRpMsKO4DeMGzYMB87dmzYYYiIvMPLL7+8z92Lwo4jGTQWi0hfpHFYRCR83Y3FKZGQGDt2LKtWdd7OXEQkfGa2LewYkkVjsYj0RRqHRUTC191YrCUbIiIiIiIiIpJ0SkiIiIiIiIiISNIpISEiIiIiIiIiSZcSNSREpG84fvw4O3fupLGxMexQki4vL4/Ro0eTnZ0ddigiIu+QDuOzxmEROVPpMFYmWk/HYiUkRKTX7Ny5k0GDBjF27FjMLOxwksbdqaurY+fOnYwbNy7scERE3iHVx+dkjcNmdjXw70Am8J/u/m+djn8B+CugBagFbnH3bcGxu4H3E5uh/BTwd0A+8DBwNtAKVLr7HUH/vwTuAXYFX/8Dd//PhF2ciKT8WJlopzMWa8mGiPSaxsZGCgsL024ANzMKCwuVTReRPivVx+dkjMNmlgksBhYAE4GbzGxip26vAjPcfSrwCHB3cO57gDnAVGAyMBO4NDjnW+5+PjAdmGNmC+K+70F3nxa8lIwQSbBUHysT7XTGYiUkRKRXpesAnq7XLSL9R6qPU0m4vlnAZnd/y92bgQeAa+M7uPtydz8WfFwBjG4/BOQBOUAukA3scfdj7r48OLcZeCXuHBEJQaqPlYnW039+SkiISErJzMxk2rRpTJ48mYqKCg4ePAjA1q1bMTO+8pWvdPTdt28f2dnZ3H777QBs2rSJyy67jGnTpjFhwgRuvfVWAJ599lmGDBnCtGnTOl5PP/108i9ORKQfMzNuvvnmjs8tLS0UFRWxcOHCE/pdd911zJ49+4S2r3/964waNeqEcbh9fE+iUcCOuM87g7bufAp4DMDdXwCWAzXB6wl33xjf2cwKgArgmbjmD5nZGjN7xMxKz/wSRCQdrVq1is997nPdHq+urub6669PYkRvU0JCRFJKfn4+q1evZt26dUSjURYvXtxxbNy4cfzhD3/o+Pzwww8zadKkjs+f+9zn+Pu//3tWr17Nxo0b+exnP9txbO7cuaxevbrjNX/+/ORckIhIihgwYADr1q2joaEBgKeeeopRo078ff7gwYO8/PLLHDp0iLfeeuuEY+3jc/uroKAgabH3lJndDMwgVgMCMzsHmEBs9sMoYJ6ZzY3rnwUsAe519/YLrwTGBss/ngJ+0c3PutXMVpnZqtra2kRdkoj0Ia2trT3qP2PGDO69995uj5eUlPDII4+caVinRQkJEUlZF198Mbt27er4HIlEmDBhAqtWrQLgwQcf5IYbbug4XlNTw+jRb8+UnTJlSvKCFRFJA9dcc01HYnjJkiXcdNNNJxz/zW9+Q0VFBTfeeCMPPPBAGCG+m11A/CyF0bxdcLKDmc0HvgwscvemoPkDwAp3P+LuR4jNnLg47rSfAG+4+/faG9y9Lu78/wQu7Cood/+Ju89w9xlFRUWneWki0lds3bqV888/n49+9KNMmDCB66+/nmPHjjF27Fj+8R//kQsuuICHH36YJ598kosvvpgLLriAD3/4wxw5cgSAl156ife85z2Ul5cza9Ys6uvrefbZZztmo/3pT3/qmGk2ffp06uvr2bp1K5MnTwZidTQ++clPMmXKFKZPn87y5csBuO+++/jgBz/I1Vdfzfjx4/k//+f/9Mr1apcNEUmIf65cz4bqw736nRNLBvO1ikkn70gsc/zMM8/wqU996oT29pvcESNGkJmZSUlJCdXV1UD
"text/plain": [
"<Figure size 1296x3024 with 18 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"metrics=list(result.columns[[i not in ['Alpha'] for i in result.columns]])\n",
"\n",
"charts_per_row=6\n",
"charts_per_column=3\n",
"\n",
"fig, axes = plt.subplots(nrows=charts_per_row, ncols=charts_per_column,figsize=(18, 7*charts_per_row ))\n",
"import itertools\n",
"to_iter=[i for i in itertools.product(range(charts_per_row), range(charts_per_column))]\n",
"\n",
"for i in range(len(metrics)):\n",
" df=result[['Alpha', metrics[i]]]\n",
" df.plot(ax=axes[to_iter[i]], title=metrics[i], x=0, y=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Beta"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/10 [00:00<?, ?it/s]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 8050.34it/s]\u001b[A\n",
" 10%|█ | 1/10 [00:08<01:20, 8.92s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7932.86it/s]\u001b[A\n",
" 20%|██ | 2/10 [00:17<01:10, 8.87s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7669.08it/s]\u001b[A\n",
" 30%|███ | 3/10 [00:26<01:02, 8.88s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7369.22it/s]\u001b[A\n",
" 40%|████ | 4/10 [00:35<00:53, 8.89s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7830.41it/s]\u001b[A\n",
" 50%|█████ | 5/10 [00:44<00:44, 8.88s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7417.00it/s]\u001b[A\n",
" 60%|██████ | 6/10 [00:53<00:35, 8.88s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7286.71it/s]\u001b[A\n",
" 70%|███████ | 7/10 [01:02<00:26, 8.88s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7803.32it/s]\u001b[A\n",
" 80%|████████ | 8/10 [01:10<00:17, 8.84s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7918.15it/s]\u001b[A\n",
" 90%|█████████ | 9/10 [01:19<00:08, 8.86s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 8223.42it/s]\u001b[A\n",
"100%|██████████| 10/10 [01:28<00:00, 8.86s/it]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Beta</th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>HR2</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0</td>\n",
" <td>3.702446</td>\n",
" <td>3.527273</td>\n",
" <td>0.282185</td>\n",
" <td>0.192092</td>\n",
" <td>0.186749</td>\n",
" <td>0.216980</td>\n",
" <td>0.204185</td>\n",
" <td>0.240096</td>\n",
" <td>0.339114</td>\n",
" <td>0.204905</td>\n",
" <td>0.572157</td>\n",
" <td>0.593544</td>\n",
" <td>0.875928</td>\n",
" <td>0.685048</td>\n",
" <td>1.000000</td>\n",
" <td>0.077201</td>\n",
" <td>3.875892</td>\n",
" <td>0.974947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.1</td>\n",
" <td>3.703312</td>\n",
" <td>3.528128</td>\n",
" <td>0.290138</td>\n",
" <td>0.197597</td>\n",
" <td>0.192259</td>\n",
" <td>0.223336</td>\n",
" <td>0.210944</td>\n",
" <td>0.246153</td>\n",
" <td>0.347768</td>\n",
" <td>0.212034</td>\n",
" <td>0.581038</td>\n",
" <td>0.596328</td>\n",
" <td>0.884411</td>\n",
" <td>0.695652</td>\n",
" <td>1.000000</td>\n",
" <td>0.085137</td>\n",
" <td>3.957416</td>\n",
" <td>0.972784</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.2</td>\n",
" <td>3.703825</td>\n",
" <td>3.528636</td>\n",
" <td>0.297137</td>\n",
" <td>0.201202</td>\n",
" <td>0.196067</td>\n",
" <td>0.228169</td>\n",
" <td>0.218026</td>\n",
" <td>0.252767</td>\n",
" <td>0.355655</td>\n",
" <td>0.219909</td>\n",
" <td>0.588904</td>\n",
" <td>0.598160</td>\n",
" <td>0.886532</td>\n",
" <td>0.697773</td>\n",
" <td>1.000000</td>\n",
" <td>0.094517</td>\n",
" <td>4.053212</td>\n",
" <td>0.969980</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.3</td>\n",
" <td>3.704130</td>\n",
" <td>3.528939</td>\n",
" <td>0.303499</td>\n",
" <td>0.204749</td>\n",
" <td>0.199901</td>\n",
" <td>0.232829</td>\n",
" <td>0.225107</td>\n",
" <td>0.260797</td>\n",
" <td>0.363757</td>\n",
" <td>0.226825</td>\n",
" <td>0.599969</td>\n",
" <td>0.599964</td>\n",
" <td>0.888653</td>\n",
" <td>0.707317</td>\n",
" <td>1.000000</td>\n",
" <td>0.105339</td>\n",
" <td>4.147779</td>\n",
" <td>0.966948</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.4</td>\n",
" <td>3.704313</td>\n",
" <td>3.529120</td>\n",
" <td>0.308908</td>\n",
" <td>0.208811</td>\n",
" <td>0.203854</td>\n",
" <td>0.237241</td>\n",
" <td>0.229614</td>\n",
" <td>0.266918</td>\n",
" <td>0.370758</td>\n",
" <td>0.232673</td>\n",
" <td>0.609385</td>\n",
" <td>0.602014</td>\n",
" <td>0.895016</td>\n",
" <td>0.718982</td>\n",
" <td>0.999894</td>\n",
" <td>0.132035</td>\n",
" <td>4.259682</td>\n",
" <td>0.962989</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.5</td>\n",
" <td>3.704422</td>\n",
" <td>3.529229</td>\n",
" <td>0.314316</td>\n",
" <td>0.211411</td>\n",
" <td>0.206768</td>\n",
" <td>0.240986</td>\n",
" <td>0.237124</td>\n",
" <td>0.273416</td>\n",
" <td>0.378307</td>\n",
" <td>0.239297</td>\n",
" <td>0.622792</td>\n",
" <td>0.603327</td>\n",
" <td>0.903499</td>\n",
" <td>0.724284</td>\n",
" <td>0.999046</td>\n",
" <td>0.168831</td>\n",
" <td>4.411281</td>\n",
" <td>0.956648</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.6</td>\n",
" <td>3.704488</td>\n",
" <td>3.529295</td>\n",
" <td>0.314634</td>\n",
" <td>0.206209</td>\n",
" <td>0.204818</td>\n",
" <td>0.240159</td>\n",
" <td>0.242489</td>\n",
" <td>0.273850</td>\n",
" <td>0.376438</td>\n",
" <td>0.238428</td>\n",
" <td>0.622042</td>\n",
" <td>0.600721</td>\n",
" <td>0.897137</td>\n",
" <td>0.720042</td>\n",
" <td>0.996394</td>\n",
" <td>0.212843</td>\n",
" <td>4.621938</td>\n",
" <td>0.945932</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.7</td>\n",
" <td>3.704528</td>\n",
" <td>3.529335</td>\n",
" <td>0.304136</td>\n",
" <td>0.187298</td>\n",
" <td>0.191990</td>\n",
" <td>0.228749</td>\n",
" <td>0.238305</td>\n",
" <td>0.256201</td>\n",
" <td>0.358807</td>\n",
" <td>0.226808</td>\n",
" <td>0.593897</td>\n",
" <td>0.591207</td>\n",
" <td>0.868505</td>\n",
" <td>0.693531</td>\n",
" <td>0.983033</td>\n",
" <td>0.256854</td>\n",
" <td>4.898568</td>\n",
" <td>0.928065</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.8</td>\n",
" <td>3.704552</td>\n",
" <td>3.529360</td>\n",
" <td>0.266384</td>\n",
" <td>0.147571</td>\n",
" <td>0.158660</td>\n",
" <td>0.194838</td>\n",
" <td>0.214485</td>\n",
" <td>0.209336</td>\n",
" <td>0.299850</td>\n",
" <td>0.184356</td>\n",
" <td>0.492852</td>\n",
" <td>0.571152</td>\n",
" <td>0.803818</td>\n",
" <td>0.604454</td>\n",
" <td>0.936373</td>\n",
" <td>0.341270</td>\n",
" <td>5.257397</td>\n",
" <td>0.895882</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.9</td>\n",
" <td>3.704567</td>\n",
" <td>3.529375</td>\n",
" <td>0.162354</td>\n",
" <td>0.076967</td>\n",
" <td>0.089233</td>\n",
" <td>0.114583</td>\n",
" <td>0.134657</td>\n",
" <td>0.113253</td>\n",
" <td>0.160868</td>\n",
" <td>0.085486</td>\n",
" <td>0.243590</td>\n",
" <td>0.535405</td>\n",
" <td>0.580064</td>\n",
" <td>0.400848</td>\n",
" <td>0.800106</td>\n",
" <td>0.415584</td>\n",
" <td>5.563910</td>\n",
" <td>0.857396</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Beta RMSE MAE precision recall F_1 F_05 \\\n",
"0 0.0 3.702446 3.527273 0.282185 0.192092 0.186749 0.216980 \n",
"0 0.1 3.703312 3.528128 0.290138 0.197597 0.192259 0.223336 \n",
"0 0.2 3.703825 3.528636 0.297137 0.201202 0.196067 0.228169 \n",
"0 0.3 3.704130 3.528939 0.303499 0.204749 0.199901 0.232829 \n",
"0 0.4 3.704313 3.529120 0.308908 0.208811 0.203854 0.237241 \n",
"0 0.5 3.704422 3.529229 0.314316 0.211411 0.206768 0.240986 \n",
"0 0.6 3.704488 3.529295 0.314634 0.206209 0.204818 0.240159 \n",
"0 0.7 3.704528 3.529335 0.304136 0.187298 0.191990 0.228749 \n",
"0 0.8 3.704552 3.529360 0.266384 0.147571 0.158660 0.194838 \n",
"0 0.9 3.704567 3.529375 0.162354 0.076967 0.089233 0.114583 \n",
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
"0 0.210944 0.246153 0.347768 0.212034 0.581038 0.596328 \n",
"0 0.218026 0.252767 0.355655 0.219909 0.588904 0.598160 \n",
"0 0.225107 0.260797 0.363757 0.226825 0.599969 0.599964 \n",
"0 0.229614 0.266918 0.370758 0.232673 0.609385 0.602014 \n",
"0 0.237124 0.273416 0.378307 0.239297 0.622792 0.603327 \n",
"0 0.242489 0.273850 0.376438 0.238428 0.622042 0.600721 \n",
"0 0.238305 0.256201 0.358807 0.226808 0.593897 0.591207 \n",
"0 0.214485 0.209336 0.299850 0.184356 0.492852 0.571152 \n",
"0 0.134657 0.113253 0.160868 0.085486 0.243590 0.535405 \n",
"\n",
" HR HR2 Reco in test Test coverage Shannon Gini \n",
"0 0.875928 0.685048 1.000000 0.077201 3.875892 0.974947 \n",
"0 0.884411 0.695652 1.000000 0.085137 3.957416 0.972784 \n",
"0 0.886532 0.697773 1.000000 0.094517 4.053212 0.969980 \n",
"0 0.888653 0.707317 1.000000 0.105339 4.147779 0.966948 \n",
"0 0.895016 0.718982 0.999894 0.132035 4.259682 0.962989 \n",
"0 0.903499 0.724284 0.999046 0.168831 4.411281 0.956648 \n",
"0 0.897137 0.720042 0.996394 0.212843 4.621938 0.945932 \n",
"0 0.868505 0.693531 0.983033 0.256854 4.898568 0.928065 \n",
"0 0.803818 0.604454 0.936373 0.341270 5.257397 0.895882 \n",
"0 0.580064 0.400848 0.800106 0.415584 5.563910 0.857396 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from tqdm import tqdm\n",
"result=[]\n",
"for beta in tqdm([round(i,1) for i in np.arange(0,1,0.1)]):\n",
" model=RP3Beta()\n",
" model.fit(train_ui, alpha=1, beta=beta)\n",
" reco=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
" estimations_df=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
" to_append=ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
" estimations_df=estimations_df, \n",
" reco=np.array(reco),\n",
" super_reactions=[4,5])\n",
" to_append.insert(0, \"Beta\", beta)\n",
" result.append(to_append)\n",
" \n",
"result=pd.concat(result)\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABCQAAAkoCAYAAACzg26yAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzdd3xX1f3H8dfJXiQhQAJJCHsFAgHDElFkiYpicYBiHbSlbuus29ba/mod2Fpqa1trrYigVXEzVByVLWFDWCEhCQkEQhKyk/P74/tNDDHISr434/18PL6PfO+5537zuYyTm88993OMtRYREREREREREU/ycjoAEREREREREWl9lJAQEREREREREY9TQkJEREREREREPE4JCRERERERERHxOCUkRERERERERMTjlJAQEREREREREY9TQkJERERERERqGGP+aox59CT6bTbGjPFASNJCKSEhLY4xJtUYU2yMKTTG7DfGvGKMCXHve8UYY40xU+ocM9vdfoN7288Y86wxZp/7c1KNMc8f53tUv/7s0RMVEWkm3GNmmTGmfZ32de6xt2uttl+524bX6XuDMaayzrhbaIyJ9sxZiIi0Htbam6y1vzmJfv2ttcs8EJK0UEpISEt1ibU2BEgEBgMP1tqXAlxXvWGM8QGuAnbV6vMgkAQMA9oAY4Bv6/setV63NfhZiIi0HHuAq6s3jDEJQFDtDsYYg2t8PkStcbqW5XXG3RBrbWZjBi0i0ly5r3FFmjQlJKRFs9buBxbhSkxUex84xxjT1r09CdgA7K/VZyjwjrU207qkWmtf9UjQIiIt0384NslwPVB3XB0NdALuAKYbY/w8FJuISLPhnnX2oDFmizHmsDHmX8aYAGPMGPfs3l8aY/YD/zLGeBljHjDG7DLG5BpjFhhjImp91jnGmG+MMXnGmPRas4VfMcY86X7f3hjzgbvPIWPMV8YYr1qxjHe/9zfGPG+MyXS/njfG+Lv3Vcd2jzEmxxiTZYy50dN/dtL0KCEhLZoxJha4ENhZq7kEWAhMd29fx/cvilcAdxtjbjHGJLjv2omIyOlbAYQaY/oZY7xxjcGv1elzPa6k8QL39iUejE9EpDmZAVwA9AB6A4+42zsCEUAXYBZwO3AZcB4QDRwG5gAYY7oAHwMvAB1w3cBLrud73QPsc/eJAh4CbD39HgZGuD9nEK6Zxo/U2t8RCANigJ8Ac2rdIJRWSgkJaaneNcYUAOlADvB4nf2vAtcZY8JxDdDv1tn/f8BTuAb7NUCGMeb6er5HXq3Xzxr8LEREWpbqWRITgK1ARvUOY0wQcCXwurW2HHiL7z+2MaLOuLsLEZHW6c/W2nRr7SHgt3z3SFwV8Li1ttRaWwzcBDxsrd1nrS0FfgVc4X6c4xpgqbV2nrW23Fqba62tLyFRjmv2Whd3v6+stfUlJGYAT1hrc6y1B4BfAz+u8zlPuD/jI6AQ6HOmfxDSvCkhIS3VZdba6toPfYFjCqlZa7/GleV9GPjAPWDX3l9prZ1jrR0FhOMa6F82xvSr8z3Ca73+3ojnIyLSEvwH1wXwDXx/ZtqPgArgI/f2XOBCY0yHWn1W1Bl3ezR2wCIiTVR6rfd7cc1+ADhgrS2pta8L8E51IhdXMrgS10yHzhxbQ+14nsY123ixMWa3MeaB4/SLdsdSX1wAudbailrbRUDISXx/acGUkJAWzVr7BfAK8Ew9u1/DNQXtB2tDWGuLrbVzcE1xi2/oGEVEWgtr7V5cxS0vAt6us/t6XBemae5nn98EfHElMERE5Fida72PA6oL/NaduZAOXFgnmRtgrc1w7zthYtdaW2Ctvcda2x24FNdjzePq6ZqJKwFSX1wi9VJCQlqD54EJxphBddr/hGva8Jd1DzDG/MJdfCfQGOPjflyjDbCu8cMVEWnRfgKMtdYerdUWA4wDJuN69rj6+eOnqH+1DRGR1u5WY0ysu0Dlw8D84/T7K/Bbd70IjDEdjDFT3PvmAuONMVe5r3fbGWMS636AMWayMaanu6baEVwzLKrq+V7zgEfc36M98BjfrxUkcgwlJKTFcz/D9iquQbF2+yFr7afHeQauCHgW18obB4Fbgcuttbtr9XnfGFNY6/VOI52CiEiLYa3dZa1dU6d5NJBsrV1srd1f/cKVOB5ojBng7jeyzrhbaIwZ6tETEBFpGl4HFgO7cT128eRx+v0ReA/X4xYFuAoMDwew1qbhmrF2D67llpNxJYPr6gUsxVXzYTnwF2vt5/X0exJX7bUNwEbg2x+ISwQAU//vYiIiIiIiItLUGGNSgZ9aa5c6HYvImdIMCRERERERERHxOCUkRERERERERMTj9MiGiIiIiIiIiHicZkiIiIiIiIiIiMcpISEiIiIiIiIiHufjdAANoX379rZr165OhyEicoy1a9cetNZ2cDoOT9FYLCJNUWsaizUOi0hT9EPjcItISHTt2pU1a+ouaS4i4ixjzF6nY/AkjcUi0hQ5PRYbYyYBfwS8gX9Ya39fZ/9NwK1AJVAIzLLWbjHGTAB+D/gBZcB91trPfuh7aRwWkaboh8ZhPbIhIiIiItIIjDHewBzgQiAeuNoYE1+n2+vW2gRrbSLwB+A5d/tB4BJrbQJwPfAfD4UtIuIxSkiIiIiIiDSOYcBOa+1ua20Z8AYwpXYHa21+rc1gwLrb11lrM93tm4FAY4y/B2IWEfGYFvHIhoiIiIhIExQDpNfa3gcMr9vJGHMrcDeuxzPG1vM5lwPfWmtL6zl2FjALIC4urgFCFhHxnBabkCgvL2ffvn2UlJQ4HYrHBQQEEBsbi6+vr9OhiEgr19LHYo23ItIQrLVzgDnGmGuAR3A9ogGAMaY/8BQw8TjHvgS8BJCUlGQbP1qRlqulX7c0ttO5LmqxCYl9+/bRpk0bunbtijHG6XA8xlpLbm4u+/bto1u3bk6HIyKtXEseizXeishJyAA619qOdbcdzxvAi9UbxphY4B3gOmvtrkaJUERqtOTrlsZ2utdFLbaGRElJCe3atWt1/5CMMbRr105ZPRFpElryWKzxVkROwmqglzGmmzHGD5gOvFe7gzGmV63Ni4Ed7vZw4EPgAWvt/zwUr0ir1pKvWxrb6V4XtdiEBNBq/yG11vMWkaapJY9JLfncROTMWWsrgNuARcBWYIG1drMx5gljzKXubrcZYzYbY5Jx1ZGoflzjNqAn8JgxJtn9ivT0OYi0NvrZfvpO58+uRScknObt7U1iYiIDBgzgkksuIS8vD4DU1FSMMTzyyCM1fQ8ePIivry+33XYbANu3b2fMmDEkJibSr18/Zs2aBcCyZcsICwsjMTGx5rV06VLPn5yISDNhjOHaa6+t2a6oqKBDhw5Mnjz5mH6XXXYZI0aMOKbtV7/6FTExMceMudVjuYjIybDWfmSt7W2t7WGt/a277TFr7Xvu93daa/tbaxOttedbaze725+01ga726tfOU6ei4g0T2vWrOGOO+447v7MzEyuuOIKD0b0HSUkGlFgYCDJycls2rSJiIgI5syZU7OvW7dufPjhhzXbb775Jv3796/ZvuOOO7jrrrtITk5m69at3H777TX7Ro8eTXJycs1r/PjxnjkhEZFmKDg4mE2bNlFcXAzAkiVLiImJOaZPXl4ea9eu5ciRI+zevfuYfdVjcfUrPDzcY7GLiIiI1FVZWXlK/ZOSkvjTn/503P3R0dG89dZbZxrWaVFCwkNGjhxJRsZ3NYyCgoLo168fa9asAWD+/PlcddVVNfuzsrKIjY2t2U5ISPBcsCIiLcxFF11UkwSeN28eV1999TH73377bS655BKmT5/OG2+84USIIiIiIqSmptK3b19mzJhBv379uOKKKygqKqJr16788pe/ZMiQIbz55pssXryYkSNHMmTIEK688koKCwsBWL16NWeffTaDBg1i2LBhFBQUsGzZspqZoV988UXNrM/BgwdTUFBAamoqAwYMAFx1NG688UYSEhIYPHgwn3/+OQCvvPIKU6dOZdKkSfTq1Yv777+/Qc63xa6yUduv39/Mlsz8Bv3M+OhQHr+k/4k74sp
"text/plain": [
"<Figure size 1296x3024 with 18 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"### import matplotlib.pyplot as plt\n",
"\n",
"metrics=list(result.columns[[i not in ['Beta'] for i in result.columns]])\n",
"\n",
"charts_per_row=6\n",
"charts_per_column=3\n",
"\n",
"fig, axes = plt.subplots(nrows=charts_per_row, ncols=charts_per_column,figsize=(18, 7*charts_per_row ))\n",
"import itertools\n",
"to_iter=[i for i in itertools.product(range(charts_per_row), range(charts_per_column))]\n",
"\n",
"for i in range(len(metrics)):\n",
" df=result[['Beta', metrics[i]]]\n",
" df.plot(ax=axes[to_iter[i]], title=metrics[i], x=0, y=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Check sample recommendations"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user</th>\n",
" <th>rating</th>\n",
" <th>title</th>\n",
" <th>genres</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>522</th>\n",
" <td>817</td>\n",
" <td>5</td>\n",
" <td>Heat (1995)</td>\n",
" <td>Action, Crime, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Toy Story (1995)</td>\n",
" <td>Animation, Children's, Comedy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28224</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Conspiracy Theory (1997)</td>\n",
" <td>Action, Mystery, Romance, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69163</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Desperate Measures (1998)</td>\n",
" <td>Crime, Drama, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62281</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Broken Arrow (1996)</td>\n",
" <td>Action, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46995</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Cop Land (1997)</td>\n",
" <td>Crime, Drama, Mystery</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44432</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Bound (1996)</td>\n",
" <td>Crime, Drama, Romance, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36735</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Lone Star (1996)</td>\n",
" <td>Drama, Mystery</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32392</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Spawn (1997)</td>\n",
" <td>Action, Adventure, Sci-Fi, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30211</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Star Trek: First Contact (1996)</td>\n",
" <td>Action, Adventure, Sci-Fi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25251</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Twelve Monkeys (1995)</td>\n",
" <td>Drama, Sci-Fi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7384</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Saint, The (1997)</td>\n",
" <td>Action, Romance, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1394</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>River Wild, The (1994)</td>\n",
" <td>Action, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>922</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Rumble in the Bronx (1995)</td>\n",
" <td>Action, Adventure, Crime</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25959</th>\n",
" <td>817</td>\n",
" <td>3</td>\n",
" <td>Dead Man Walking (1995)</td>\n",
" <td>Drama</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user rating title \\\n",
"522 817 5 Heat (1995) \n",
"85 817 4 Toy Story (1995) \n",
"28224 817 4 Conspiracy Theory (1997) \n",
"69163 817 4 Desperate Measures (1998) \n",
"62281 817 4 Broken Arrow (1996) \n",
"46995 817 4 Cop Land (1997) \n",
"44432 817 4 Bound (1996) \n",
"36735 817 4 Lone Star (1996) \n",
"32392 817 4 Spawn (1997) \n",
"30211 817 4 Star Trek: First Contact (1996) \n",
"25251 817 4 Twelve Monkeys (1995) \n",
"7384 817 4 Saint, The (1997) \n",
"1394 817 4 River Wild, The (1994) \n",
"922 817 4 Rumble in the Bronx (1995) \n",
"25959 817 3 Dead Man Walking (1995) \n",
"\n",
" genres \n",
"522 Action, Crime, Thriller \n",
"85 Animation, Children's, Comedy \n",
"28224 Action, Mystery, Romance, Thriller \n",
"69163 Crime, Drama, Thriller \n",
"62281 Action, Thriller \n",
"46995 Crime, Drama, Mystery \n",
"44432 Crime, Drama, Romance, Thriller \n",
"36735 Drama, Mystery \n",
"32392 Action, Adventure, Sci-Fi, Thriller \n",
"30211 Action, Adventure, Sci-Fi \n",
"25251 Drama, Sci-Fi \n",
"7384 Action, Romance, Thriller \n",
"1394 Action, Thriller \n",
"922 Action, Adventure, Crime \n",
"25959 Drama "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user</th>\n",
" <th>rec_nb</th>\n",
" <th>title</th>\n",
" <th>genres</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>356</th>\n",
" <td>817.0</td>\n",
" <td>1</td>\n",
" <td>Star Wars (1977)</td>\n",
" <td>Action, Adventure, Romance, Sci-Fi, War</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4699</th>\n",
" <td>817.0</td>\n",
" <td>2</td>\n",
" <td>Air Force One (1997)</td>\n",
" <td>Action, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7275</th>\n",
" <td>817.0</td>\n",
" <td>3</td>\n",
" <td>Fargo (1996)</td>\n",
" <td>Crime, Drama, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2969</th>\n",
" <td>817.0</td>\n",
" <td>4</td>\n",
" <td>Return of the Jedi (1983)</td>\n",
" <td>Action, Adventure, Romance, Sci-Fi, War</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1954</th>\n",
" <td>817.0</td>\n",
" <td>5</td>\n",
" <td>Scream (1996)</td>\n",
" <td>Horror, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1284</th>\n",
" <td>817.0</td>\n",
" <td>6</td>\n",
" <td>English Patient, The (1996)</td>\n",
" <td>Drama, Romance, War</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4996</th>\n",
" <td>817.0</td>\n",
" <td>7</td>\n",
" <td>Titanic (1997)</td>\n",
" <td>Action, Drama, Romance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7667</th>\n",
" <td>817.0</td>\n",
" <td>8</td>\n",
" <td>Rock, The (1996)</td>\n",
" <td>Action, Adventure, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5453</th>\n",
" <td>817.0</td>\n",
" <td>9</td>\n",
" <td>Liar Liar (1997)</td>\n",
" <td>Comedy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2554</th>\n",
" <td>817.0</td>\n",
" <td>10</td>\n",
" <td>Godfather, The (1972)</td>\n",
" <td>Action, Crime, Drama</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user rec_nb title \\\n",
"356 817.0 1 Star Wars (1977) \n",
"4699 817.0 2 Air Force One (1997) \n",
"7275 817.0 3 Fargo (1996) \n",
"2969 817.0 4 Return of the Jedi (1983) \n",
"1954 817.0 5 Scream (1996) \n",
"1284 817.0 6 English Patient, The (1996) \n",
"4996 817.0 7 Titanic (1997) \n",
"7667 817.0 8 Rock, The (1996) \n",
"5453 817.0 9 Liar Liar (1997) \n",
"2554 817.0 10 Godfather, The (1972) \n",
"\n",
" genres \n",
"356 Action, Adventure, Romance, Sci-Fi, War \n",
"4699 Action, Thriller \n",
"7275 Crime, Drama, Thriller \n",
"2969 Action, Adventure, Romance, Sci-Fi, War \n",
"1954 Horror, Thriller \n",
"1284 Drama, Romance, War \n",
"4996 Action, Drama, Romance \n",
"7667 Action, Adventure, Thriller \n",
"5453 Comedy \n",
"2554 Action, Crime, Drama "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
"items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n",
"\n",
"user=random.choice(list(set(train['user'])))\n",
"\n",
"train_content=pd.merge(train, items, left_on='item', right_on='id')\n",
"display(train_content[train_content['user']==user][['user', 'rating', 'title', 'genres']]\\\n",
" .sort_values(by='rating', ascending=False)[:15])\n",
"\n",
"reco = np.loadtxt('Recommendations generated/ml-100k/Self_P3_reco.csv', delimiter=',')\n",
"items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n",
"\n",
"# Let's ignore scores - they are not used in evaluation: \n",
"reco_users=reco[:,:1]\n",
"reco_items=reco[:,1::2]\n",
"# Let's put them into one array\n",
"reco=np.concatenate((reco_users, reco_items), axis=1)\n",
"\n",
"# Let's rebuild it user-item dataframe\n",
"recommended=[]\n",
"for row in reco:\n",
" for rec_nb, entry in enumerate(row[1:]):\n",
" recommended.append((row[0], rec_nb+1, entry))\n",
"recommended=pd.DataFrame(recommended, columns=['user','rec_nb', 'item'])\n",
"\n",
"recommended_content=pd.merge(recommended, items, left_on='item', right_on='id')\n",
"recommended_content[recommended_content['user']==user][['user', 'rec_nb', 'title', 'genres']].sort_values(by='rec_nb')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# project task 6: generate recommendations of RP3Beta for hiperparameters found to optimize recall"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# use better values than (1,0) for alpha and beta\n",
"# if you want you can also modify the model to consider different weights (we took as weights user ratings, maybe take ones or squares of ratings instead)\n",
"# save the outptut in 'Recommendations generated/ml-100k/Self_RP3Beta_estimations.csv'\n",
"# and 'Recommendations generated/ml-100k/Self_RP3Beta_reco.csv'"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"943it [00:00, 7573.18it/s]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>HR2</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3.702928</td>\n",
" <td>3.527713</td>\n",
" <td>0.322694</td>\n",
" <td>0.216069</td>\n",
" <td>0.212152</td>\n",
" <td>0.247538</td>\n",
" <td>0.245279</td>\n",
" <td>0.284983</td>\n",
" <td>0.388271</td>\n",
" <td>0.248239</td>\n",
" <td>0.636318</td>\n",
" <td>0.605683</td>\n",
" <td>0.910923</td>\n",
" <td>0.731707</td>\n",
" <td>0.999788</td>\n",
" <td>0.178932</td>\n",
" <td>4.549663</td>\n",
" <td>0.950182</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" RMSE MAE precision recall F_1 F_05 \\\n",
"0 3.702928 3.527713 0.322694 0.216069 0.212152 0.247538 \n",
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
"0 0.245279 0.284983 0.388271 0.248239 0.636318 0.605683 \n",
"\n",
" HR HR2 Reco in test Test coverage Shannon Gini \n",
"0 0.910923 0.731707 0.999788 0.178932 4.549663 0.950182 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import evaluation_measures as ev\n",
"\n",
"model = RP3Beta()\n",
"model.fit(train_ui, alpha = 0.8, beta = 0.6)\n",
"\n",
"top_n = pd.DataFrame(model.recommend(user_code_id, item_code_id, topK = 10))\n",
"top_n.to_csv('Recommendations generated/ml-100k/Self_RP3Beta_reco.csv', index = False, header = False)\n",
"\n",
"estimations = pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
"estimations.to_csv('Recommendations generated/ml-100k/Self_RP3Beta_estimations.csv', index = False, header = False)\n",
"estimations_df = pd.read_csv('Recommendations generated/ml-100k/Self_RP3Beta_estimations.csv', header = None)\n",
"\n",
"reco = np.loadtxt('Recommendations generated/ml-100k/Self_RP3Beta_reco.csv', delimiter = ',')\n",
"\n",
"ev.evaluate(test = pd.read_csv('./Datasets/ml-100k/test.csv', sep = '\\t', header = None),\n",
" estimations_df = estimations_df, \n",
" reco = reco,\n",
" super_reactions = [4, 5])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Beta</th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>HR2</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.9</td>\n",
" <td>3.704567</td>\n",
" <td>3.529375</td>\n",
" <td>0.162354</td>\n",
" <td>0.076967</td>\n",
" <td>0.089233</td>\n",
" <td>0.114583</td>\n",
" <td>0.134657</td>\n",
" <td>0.113253</td>\n",
" <td>0.160868</td>\n",
" <td>0.085486</td>\n",
" <td>0.243590</td>\n",
" <td>0.535405</td>\n",
" <td>0.580064</td>\n",
" <td>0.400848</td>\n",
" <td>0.800106</td>\n",
" <td>0.415584</td>\n",
" <td>5.563910</td>\n",
" <td>0.857396</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.8</td>\n",
" <td>3.704552</td>\n",
" <td>3.529360</td>\n",
" <td>0.266384</td>\n",
" <td>0.147571</td>\n",
" <td>0.158660</td>\n",
" <td>0.194838</td>\n",
" <td>0.214485</td>\n",
" <td>0.209336</td>\n",
" <td>0.299850</td>\n",
" <td>0.184356</td>\n",
" <td>0.492852</td>\n",
" <td>0.571152</td>\n",
" <td>0.803818</td>\n",
" <td>0.604454</td>\n",
" <td>0.936373</td>\n",
" <td>0.341270</td>\n",
" <td>5.257397</td>\n",
" <td>0.895882</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.7</td>\n",
" <td>3.704528</td>\n",
" <td>3.529335</td>\n",
" <td>0.304136</td>\n",
" <td>0.187298</td>\n",
" <td>0.191990</td>\n",
" <td>0.228749</td>\n",
" <td>0.238305</td>\n",
" <td>0.256201</td>\n",
" <td>0.358807</td>\n",
" <td>0.226808</td>\n",
" <td>0.593897</td>\n",
" <td>0.591207</td>\n",
" <td>0.868505</td>\n",
" <td>0.693531</td>\n",
" <td>0.983033</td>\n",
" <td>0.256854</td>\n",
" <td>4.898568</td>\n",
" <td>0.928065</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0</td>\n",
" <td>3.702446</td>\n",
" <td>3.527273</td>\n",
" <td>0.282185</td>\n",
" <td>0.192092</td>\n",
" <td>0.186749</td>\n",
" <td>0.216980</td>\n",
" <td>0.204185</td>\n",
" <td>0.240096</td>\n",
" <td>0.339114</td>\n",
" <td>0.204905</td>\n",
" <td>0.572157</td>\n",
" <td>0.593544</td>\n",
" <td>0.875928</td>\n",
" <td>0.685048</td>\n",
" <td>1.000000</td>\n",
" <td>0.077201</td>\n",
" <td>3.875892</td>\n",
" <td>0.974947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.1</td>\n",
" <td>3.703312</td>\n",
" <td>3.528128</td>\n",
" <td>0.290138</td>\n",
" <td>0.197597</td>\n",
" <td>0.192259</td>\n",
" <td>0.223336</td>\n",
" <td>0.210944</td>\n",
" <td>0.246153</td>\n",
" <td>0.347768</td>\n",
" <td>0.212034</td>\n",
" <td>0.581038</td>\n",
" <td>0.596328</td>\n",
" <td>0.884411</td>\n",
" <td>0.695652</td>\n",
" <td>1.000000</td>\n",
" <td>0.085137</td>\n",
" <td>3.957416</td>\n",
" <td>0.972784</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.2</td>\n",
" <td>3.703825</td>\n",
" <td>3.528636</td>\n",
" <td>0.297137</td>\n",
" <td>0.201202</td>\n",
" <td>0.196067</td>\n",
" <td>0.228169</td>\n",
" <td>0.218026</td>\n",
" <td>0.252767</td>\n",
" <td>0.355655</td>\n",
" <td>0.219909</td>\n",
" <td>0.588904</td>\n",
" <td>0.598160</td>\n",
" <td>0.886532</td>\n",
" <td>0.697773</td>\n",
" <td>1.000000</td>\n",
" <td>0.094517</td>\n",
" <td>4.053212</td>\n",
" <td>0.969980</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.3</td>\n",
" <td>3.704130</td>\n",
" <td>3.528939</td>\n",
" <td>0.303499</td>\n",
" <td>0.204749</td>\n",
" <td>0.199901</td>\n",
" <td>0.232829</td>\n",
" <td>0.225107</td>\n",
" <td>0.260797</td>\n",
" <td>0.363757</td>\n",
" <td>0.226825</td>\n",
" <td>0.599969</td>\n",
" <td>0.599964</td>\n",
" <td>0.888653</td>\n",
" <td>0.707317</td>\n",
" <td>1.000000</td>\n",
" <td>0.105339</td>\n",
" <td>4.147779</td>\n",
" <td>0.966948</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.6</td>\n",
" <td>3.704488</td>\n",
" <td>3.529295</td>\n",
" <td>0.314634</td>\n",
" <td>0.206209</td>\n",
" <td>0.204818</td>\n",
" <td>0.240159</td>\n",
" <td>0.242489</td>\n",
" <td>0.273850</td>\n",
" <td>0.376438</td>\n",
" <td>0.238428</td>\n",
" <td>0.622042</td>\n",
" <td>0.600721</td>\n",
" <td>0.897137</td>\n",
" <td>0.720042</td>\n",
" <td>0.996394</td>\n",
" <td>0.212843</td>\n",
" <td>4.621938</td>\n",
" <td>0.945932</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.4</td>\n",
" <td>3.704313</td>\n",
" <td>3.529120</td>\n",
" <td>0.308908</td>\n",
" <td>0.208811</td>\n",
" <td>0.203854</td>\n",
" <td>0.237241</td>\n",
" <td>0.229614</td>\n",
" <td>0.266918</td>\n",
" <td>0.370758</td>\n",
" <td>0.232673</td>\n",
" <td>0.609385</td>\n",
" <td>0.602014</td>\n",
" <td>0.895016</td>\n",
" <td>0.718982</td>\n",
" <td>0.999894</td>\n",
" <td>0.132035</td>\n",
" <td>4.259682</td>\n",
" <td>0.962989</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.5</td>\n",
" <td>3.704422</td>\n",
" <td>3.529229</td>\n",
" <td>0.314316</td>\n",
" <td>0.211411</td>\n",
" <td>0.206768</td>\n",
" <td>0.240986</td>\n",
" <td>0.237124</td>\n",
" <td>0.273416</td>\n",
" <td>0.378307</td>\n",
" <td>0.239297</td>\n",
" <td>0.622792</td>\n",
" <td>0.603327</td>\n",
" <td>0.903499</td>\n",
" <td>0.724284</td>\n",
" <td>0.999046</td>\n",
" <td>0.168831</td>\n",
" <td>4.411281</td>\n",
" <td>0.956648</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Beta RMSE MAE precision recall F_1 F_05 \\\n",
"0 0.9 3.704567 3.529375 0.162354 0.076967 0.089233 0.114583 \n",
"0 0.8 3.704552 3.529360 0.266384 0.147571 0.158660 0.194838 \n",
"0 0.7 3.704528 3.529335 0.304136 0.187298 0.191990 0.228749 \n",
"0 0.0 3.702446 3.527273 0.282185 0.192092 0.186749 0.216980 \n",
"0 0.1 3.703312 3.528128 0.290138 0.197597 0.192259 0.223336 \n",
"0 0.2 3.703825 3.528636 0.297137 0.201202 0.196067 0.228169 \n",
"0 0.3 3.704130 3.528939 0.303499 0.204749 0.199901 0.232829 \n",
"0 0.6 3.704488 3.529295 0.314634 0.206209 0.204818 0.240159 \n",
"0 0.4 3.704313 3.529120 0.308908 0.208811 0.203854 0.237241 \n",
"0 0.5 3.704422 3.529229 0.314316 0.211411 0.206768 0.240986 \n",
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
"0 0.134657 0.113253 0.160868 0.085486 0.243590 0.535405 \n",
"0 0.214485 0.209336 0.299850 0.184356 0.492852 0.571152 \n",
"0 0.238305 0.256201 0.358807 0.226808 0.593897 0.591207 \n",
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
"0 0.210944 0.246153 0.347768 0.212034 0.581038 0.596328 \n",
"0 0.218026 0.252767 0.355655 0.219909 0.588904 0.598160 \n",
"0 0.225107 0.260797 0.363757 0.226825 0.599969 0.599964 \n",
"0 0.242489 0.273850 0.376438 0.238428 0.622042 0.600721 \n",
"0 0.229614 0.266918 0.370758 0.232673 0.609385 0.602014 \n",
"0 0.237124 0.273416 0.378307 0.239297 0.622792 0.603327 \n",
"\n",
" HR HR2 Reco in test Test coverage Shannon Gini \n",
"0 0.580064 0.400848 0.800106 0.415584 5.563910 0.857396 \n",
"0 0.803818 0.604454 0.936373 0.341270 5.257397 0.895882 \n",
"0 0.868505 0.693531 0.983033 0.256854 4.898568 0.928065 \n",
"0 0.875928 0.685048 1.000000 0.077201 3.875892 0.974947 \n",
"0 0.884411 0.695652 1.000000 0.085137 3.957416 0.972784 \n",
"0 0.886532 0.697773 1.000000 0.094517 4.053212 0.969980 \n",
"0 0.888653 0.707317 1.000000 0.105339 4.147779 0.966948 \n",
"0 0.897137 0.720042 0.996394 0.212843 4.621938 0.945932 \n",
"0 0.895016 0.718982 0.999894 0.132035 4.259682 0.962989 \n",
"0 0.903499 0.724284 0.999046 0.168831 4.411281 0.956648 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result.sort_values([\"recall\"])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"model=RP3Beta()\n",
"model.fit(train_ui, alpha=0.8, beta=0.6)\n",
"reco=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
"estimations_df=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
"reco.to_csv('Recommendations generated/ml-100k/Self_RP3Beta_reco.csv', index=False, header=False)\n",
"estimations_df.to_csv('Recommendations generated/ml-100k/Self_RP3Beta_estimations.csv', index=False, header=False)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"943it [00:00, 8158.14it/s]\n",
"943it [00:00, 9333.50it/s]\n",
"943it [00:00, 9476.19it/s]\n",
"943it [00:00, 9525.85it/s]\n",
"943it [00:00, 8598.75it/s]\n",
"943it [00:00, 8068.36it/s]\n",
"943it [00:00, 8083.56it/s]\n",
"943it [00:00, 8668.60it/s]\n",
"943it [00:00, 9727.66it/s]\n",
"943it [00:00, 9319.03it/s]\n",
"943it [00:00, 9498.31it/s]\n",
"943it [00:00, 8207.35it/s]\n",
"943it [00:00, 9366.05it/s]\n",
"943it [00:00, 9705.08it/s]\n",
"943it [00:00, 9080.16it/s]\n",
"943it [00:00, 9075.35it/s]\n",
"943it [00:00, 9200.95it/s]\n",
"943it [00:00, 9218.29it/s]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Model</th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>HR2</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_RP3Beta</td>\n",
" <td>3.702928</td>\n",
" <td>3.527713</td>\n",
" <td>0.322694</td>\n",
" <td>0.216069</td>\n",
" <td>0.212152</td>\n",
" <td>0.247538</td>\n",
" <td>0.245279</td>\n",
" <td>0.284983</td>\n",
" <td>0.388271</td>\n",
" <td>0.248239</td>\n",
" <td>0.636318</td>\n",
" <td>0.605683</td>\n",
" <td>0.910923</td>\n",
" <td>0.731707</td>\n",
" <td>0.999788</td>\n",
" <td>0.178932</td>\n",
" <td>4.549663</td>\n",
" <td>0.950182</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_P3</td>\n",
" <td>3.702446</td>\n",
" <td>3.527273</td>\n",
" <td>0.282185</td>\n",
" <td>0.192092</td>\n",
" <td>0.186749</td>\n",
" <td>0.216980</td>\n",
" <td>0.204185</td>\n",
" <td>0.240096</td>\n",
" <td>0.339114</td>\n",
" <td>0.204905</td>\n",
" <td>0.572157</td>\n",
" <td>0.593544</td>\n",
" <td>0.875928</td>\n",
" <td>0.685048</td>\n",
" <td>1.000000</td>\n",
" <td>0.077201</td>\n",
" <td>3.875892</td>\n",
" <td>0.974947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_TopPop</td>\n",
" <td>2.508258</td>\n",
" <td>2.217909</td>\n",
" <td>0.188865</td>\n",
" <td>0.116919</td>\n",
" <td>0.118732</td>\n",
" <td>0.141584</td>\n",
" <td>0.130472</td>\n",
" <td>0.137473</td>\n",
" <td>0.214651</td>\n",
" <td>0.111707</td>\n",
" <td>0.400939</td>\n",
" <td>0.555546</td>\n",
" <td>0.765642</td>\n",
" <td>0.492047</td>\n",
" <td>1.000000</td>\n",
" <td>0.038961</td>\n",
" <td>3.159079</td>\n",
" <td>0.987317</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_SVDBaseline</td>\n",
" <td>3.644790</td>\n",
" <td>3.479397</td>\n",
" <td>0.137010</td>\n",
" <td>0.082007</td>\n",
" <td>0.083942</td>\n",
" <td>0.100776</td>\n",
" <td>0.106974</td>\n",
" <td>0.105605</td>\n",
" <td>0.160418</td>\n",
" <td>0.080222</td>\n",
" <td>0.322261</td>\n",
" <td>0.537895</td>\n",
" <td>0.626723</td>\n",
" <td>0.360551</td>\n",
" <td>0.999894</td>\n",
" <td>0.276335</td>\n",
" <td>5.123235</td>\n",
" <td>0.910511</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_SVD</td>\n",
" <td>0.950945</td>\n",
" <td>0.749680</td>\n",
" <td>0.098834</td>\n",
" <td>0.049106</td>\n",
" <td>0.054037</td>\n",
" <td>0.068741</td>\n",
" <td>0.087768</td>\n",
" <td>0.073987</td>\n",
" <td>0.113242</td>\n",
" <td>0.054201</td>\n",
" <td>0.243492</td>\n",
" <td>0.521280</td>\n",
" <td>0.493107</td>\n",
" <td>0.248144</td>\n",
" <td>0.998515</td>\n",
" <td>0.214286</td>\n",
" <td>4.413166</td>\n",
" <td>0.953488</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_SVD</td>\n",
" <td>0.915079</td>\n",
" <td>0.718240</td>\n",
" <td>0.104772</td>\n",
" <td>0.045496</td>\n",
" <td>0.054393</td>\n",
" <td>0.071374</td>\n",
" <td>0.094421</td>\n",
" <td>0.076826</td>\n",
" <td>0.109517</td>\n",
" <td>0.052005</td>\n",
" <td>0.206646</td>\n",
" <td>0.519484</td>\n",
" <td>0.487805</td>\n",
" <td>0.264051</td>\n",
" <td>0.874549</td>\n",
" <td>0.142136</td>\n",
" <td>3.890472</td>\n",
" <td>0.972126</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_Baseline</td>\n",
" <td>0.949459</td>\n",
" <td>0.752487</td>\n",
" <td>0.091410</td>\n",
" <td>0.037652</td>\n",
" <td>0.046030</td>\n",
" <td>0.061286</td>\n",
" <td>0.079614</td>\n",
" <td>0.056463</td>\n",
" <td>0.095957</td>\n",
" <td>0.043178</td>\n",
" <td>0.198193</td>\n",
" <td>0.515501</td>\n",
" <td>0.437964</td>\n",
" <td>0.239661</td>\n",
" <td>1.000000</td>\n",
" <td>0.033911</td>\n",
" <td>2.836513</td>\n",
" <td>0.991139</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_SVDBiased</td>\n",
" <td>0.938535</td>\n",
" <td>0.738678</td>\n",
" <td>0.085366</td>\n",
" <td>0.036921</td>\n",
" <td>0.044151</td>\n",
" <td>0.057832</td>\n",
" <td>0.074893</td>\n",
" <td>0.056396</td>\n",
" <td>0.095960</td>\n",
" <td>0.044204</td>\n",
" <td>0.212483</td>\n",
" <td>0.515132</td>\n",
" <td>0.446448</td>\n",
" <td>0.217391</td>\n",
" <td>0.997561</td>\n",
" <td>0.168110</td>\n",
" <td>4.191946</td>\n",
" <td>0.963341</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_KNNSurprisetask</td>\n",
" <td>0.946255</td>\n",
" <td>0.745209</td>\n",
" <td>0.083457</td>\n",
" <td>0.032848</td>\n",
" <td>0.041227</td>\n",
" <td>0.055493</td>\n",
" <td>0.074785</td>\n",
" <td>0.048890</td>\n",
" <td>0.089577</td>\n",
" <td>0.040902</td>\n",
" <td>0.189057</td>\n",
" <td>0.513076</td>\n",
" <td>0.417815</td>\n",
" <td>0.217391</td>\n",
" <td>0.888547</td>\n",
" <td>0.130592</td>\n",
" <td>3.611806</td>\n",
" <td>0.978659</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_GlobalAvg</td>\n",
" <td>1.125760</td>\n",
" <td>0.943534</td>\n",
" <td>0.061188</td>\n",
" <td>0.025968</td>\n",
" <td>0.031383</td>\n",
" <td>0.041343</td>\n",
" <td>0.040558</td>\n",
" <td>0.032107</td>\n",
" <td>0.067695</td>\n",
" <td>0.027470</td>\n",
" <td>0.171187</td>\n",
" <td>0.509546</td>\n",
" <td>0.384942</td>\n",
" <td>0.142100</td>\n",
" <td>1.000000</td>\n",
" <td>0.025974</td>\n",
" <td>2.711772</td>\n",
" <td>0.992003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_Random</td>\n",
" <td>1.522798</td>\n",
" <td>1.222501</td>\n",
" <td>0.049841</td>\n",
" <td>0.020656</td>\n",
" <td>0.025232</td>\n",
" <td>0.033446</td>\n",
" <td>0.030579</td>\n",
" <td>0.022927</td>\n",
" <td>0.051680</td>\n",
" <td>0.019110</td>\n",
" <td>0.123085</td>\n",
" <td>0.506849</td>\n",
" <td>0.331919</td>\n",
" <td>0.119830</td>\n",
" <td>0.985048</td>\n",
" <td>0.183983</td>\n",
" <td>5.097973</td>\n",
" <td>0.907483</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_I-KNN</td>\n",
" <td>1.030386</td>\n",
" <td>0.813067</td>\n",
" <td>0.026087</td>\n",
" <td>0.006908</td>\n",
" <td>0.010593</td>\n",
" <td>0.016046</td>\n",
" <td>0.021137</td>\n",
" <td>0.009522</td>\n",
" <td>0.024214</td>\n",
" <td>0.008958</td>\n",
" <td>0.048068</td>\n",
" <td>0.499885</td>\n",
" <td>0.154825</td>\n",
" <td>0.072110</td>\n",
" <td>0.402333</td>\n",
" <td>0.434343</td>\n",
" <td>5.133650</td>\n",
" <td>0.877999</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_I-KNNBaseline</td>\n",
" <td>0.935327</td>\n",
" <td>0.737424</td>\n",
" <td>0.002545</td>\n",
" <td>0.000755</td>\n",
" <td>0.001105</td>\n",
" <td>0.001602</td>\n",
" <td>0.002253</td>\n",
" <td>0.000930</td>\n",
" <td>0.003444</td>\n",
" <td>0.001362</td>\n",
" <td>0.011760</td>\n",
" <td>0.496724</td>\n",
" <td>0.021209</td>\n",
" <td>0.004242</td>\n",
" <td>0.482821</td>\n",
" <td>0.059885</td>\n",
" <td>2.232578</td>\n",
" <td>0.994487</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_U-KNN</td>\n",
" <td>1.023495</td>\n",
" <td>0.807913</td>\n",
" <td>0.000742</td>\n",
" <td>0.000205</td>\n",
" <td>0.000305</td>\n",
" <td>0.000449</td>\n",
" <td>0.000536</td>\n",
" <td>0.000198</td>\n",
" <td>0.000845</td>\n",
" <td>0.000274</td>\n",
" <td>0.002744</td>\n",
" <td>0.496441</td>\n",
" <td>0.007423</td>\n",
" <td>0.000000</td>\n",
" <td>0.602121</td>\n",
" <td>0.010823</td>\n",
" <td>2.089186</td>\n",
" <td>0.995706</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_TopRated</td>\n",
" <td>2.508258</td>\n",
" <td>2.217909</td>\n",
" <td>0.000954</td>\n",
" <td>0.000188</td>\n",
" <td>0.000298</td>\n",
" <td>0.000481</td>\n",
" <td>0.000644</td>\n",
" <td>0.000223</td>\n",
" <td>0.001043</td>\n",
" <td>0.000335</td>\n",
" <td>0.003348</td>\n",
" <td>0.496433</td>\n",
" <td>0.009544</td>\n",
" <td>0.000000</td>\n",
" <td>0.699046</td>\n",
" <td>0.005051</td>\n",
" <td>1.945910</td>\n",
" <td>0.995669</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_BaselineIU</td>\n",
" <td>0.958136</td>\n",
" <td>0.754051</td>\n",
" <td>0.000954</td>\n",
" <td>0.000188</td>\n",
" <td>0.000298</td>\n",
" <td>0.000481</td>\n",
" <td>0.000644</td>\n",
" <td>0.000223</td>\n",
" <td>0.001043</td>\n",
" <td>0.000335</td>\n",
" <td>0.003348</td>\n",
" <td>0.496433</td>\n",
" <td>0.009544</td>\n",
" <td>0.000000</td>\n",
" <td>0.699046</td>\n",
" <td>0.005051</td>\n",
" <td>1.945910</td>\n",
" <td>0.995669</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_BaselineUI</td>\n",
" <td>0.967585</td>\n",
" <td>0.762740</td>\n",
" <td>0.000954</td>\n",
" <td>0.000170</td>\n",
" <td>0.000278</td>\n",
" <td>0.000463</td>\n",
" <td>0.000644</td>\n",
" <td>0.000189</td>\n",
" <td>0.000752</td>\n",
" <td>0.000168</td>\n",
" <td>0.001677</td>\n",
" <td>0.496424</td>\n",
" <td>0.009544</td>\n",
" <td>0.000000</td>\n",
" <td>0.600530</td>\n",
" <td>0.005051</td>\n",
" <td>1.803126</td>\n",
" <td>0.996380</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_IKNN</td>\n",
" <td>1.018363</td>\n",
" <td>0.808793</td>\n",
" <td>0.000318</td>\n",
" <td>0.000108</td>\n",
" <td>0.000140</td>\n",
" <td>0.000189</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000214</td>\n",
" <td>0.000037</td>\n",
" <td>0.000368</td>\n",
" <td>0.496391</td>\n",
" <td>0.003181</td>\n",
" <td>0.000000</td>\n",
" <td>0.392153</td>\n",
" <td>0.115440</td>\n",
" <td>4.174741</td>\n",
" <td>0.965327</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Model RMSE MAE precision recall F_1 \\\n",
"0 Self_RP3Beta 3.702928 3.527713 0.322694 0.216069 0.212152 \n",
"0 Self_P3 3.702446 3.527273 0.282185 0.192092 0.186749 \n",
"0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n",
"0 Self_SVDBaseline 3.644790 3.479397 0.137010 0.082007 0.083942 \n",
"0 Ready_SVD 0.950945 0.749680 0.098834 0.049106 0.054037 \n",
"0 Self_SVD 0.915079 0.718240 0.104772 0.045496 0.054393 \n",
"0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n",
"0 Ready_SVDBiased 0.938535 0.738678 0.085366 0.036921 0.044151 \n",
"0 Self_KNNSurprisetask 0.946255 0.745209 0.083457 0.032848 0.041227 \n",
"0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n",
"0 Ready_Random 1.522798 1.222501 0.049841 0.020656 0.025232 \n",
"0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 \n",
"0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 \n",
"0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 \n",
"0 Self_TopRated 2.508258 2.217909 0.000954 0.000188 0.000298 \n",
"0 Self_BaselineIU 0.958136 0.754051 0.000954 0.000188 0.000298 \n",
"0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n",
"0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n",
"\n",
" F_05 precision_super recall_super NDCG mAP MRR \\\n",
"0 0.247538 0.245279 0.284983 0.388271 0.248239 0.636318 \n",
"0 0.216980 0.204185 0.240096 0.339114 0.204905 0.572157 \n",
"0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n",
"0 0.100776 0.106974 0.105605 0.160418 0.080222 0.322261 \n",
"0 0.068741 0.087768 0.073987 0.113242 0.054201 0.243492 \n",
"0 0.071374 0.094421 0.076826 0.109517 0.052005 0.206646 \n",
"0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n",
"0 0.057832 0.074893 0.056396 0.095960 0.044204 0.212483 \n",
"0 0.055493 0.074785 0.048890 0.089577 0.040902 0.189057 \n",
"0 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 \n",
"0 0.033446 0.030579 0.022927 0.051680 0.019110 0.123085 \n",
"0 0.016046 0.021137 0.009522 0.024214 0.008958 0.048068 \n",
"0 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 \n",
"0 0.000449 0.000536 0.000198 0.000845 0.000274 0.002744 \n",
"0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n",
"0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n",
"0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n",
"0 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 \n",
"\n",
" LAUC HR HR2 Reco in test Test coverage Shannon \\\n",
"0 0.605683 0.910923 0.731707 0.999788 0.178932 4.549663 \n",
"0 0.593544 0.875928 0.685048 1.000000 0.077201 3.875892 \n",
"0 0.555546 0.765642 0.492047 1.000000 0.038961 3.159079 \n",
"0 0.537895 0.626723 0.360551 0.999894 0.276335 5.123235 \n",
"0 0.521280 0.493107 0.248144 0.998515 0.214286 4.413166 \n",
"0 0.519484 0.487805 0.264051 0.874549 0.142136 3.890472 \n",
"0 0.515501 0.437964 0.239661 1.000000 0.033911 2.836513 \n",
"0 0.515132 0.446448 0.217391 0.997561 0.168110 4.191946 \n",
"0 0.513076 0.417815 0.217391 0.888547 0.130592 3.611806 \n",
"0 0.509546 0.384942 0.142100 1.000000 0.025974 2.711772 \n",
"0 0.506849 0.331919 0.119830 0.985048 0.183983 5.097973 \n",
"0 0.499885 0.154825 0.072110 0.402333 0.434343 5.133650 \n",
"0 0.496724 0.021209 0.004242 0.482821 0.059885 2.232578 \n",
"0 0.496441 0.007423 0.000000 0.602121 0.010823 2.089186 \n",
"0 0.496433 0.009544 0.000000 0.699046 0.005051 1.945910 \n",
"0 0.496433 0.009544 0.000000 0.699046 0.005051 1.945910 \n",
"0 0.496424 0.009544 0.000000 0.600530 0.005051 1.803126 \n",
"0 0.496391 0.003181 0.000000 0.392153 0.115440 4.174741 \n",
"\n",
" Gini \n",
"0 0.950182 \n",
"0 0.974947 \n",
"0 0.987317 \n",
"0 0.910511 \n",
"0 0.953488 \n",
"0 0.972126 \n",
"0 0.991139 \n",
"0 0.963341 \n",
"0 0.978659 \n",
"0 0.992003 \n",
"0 0.907483 \n",
"0 0.877999 \n",
"0 0.994487 \n",
"0 0.995706 \n",
"0 0.995669 \n",
"0 0.995669 \n",
"0 0.996380 \n",
"0 0.965327 "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import imp\n",
"imp.reload(ev)\n",
"\n",
"import evaluation_measures as ev\n",
"dir_path=\"Recommendations generated/ml-100k/\"\n",
"super_reactions=[4,5]\n",
"test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
"\n",
"ev.evaluate_all(test, dir_path, super_reactions)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# project task 7 (optional): implement graph-based model of your choice "
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# for example change length of paths in RP3beta\n",
"# save the outptut in 'Recommendations generated/ml-100k/Self_GraphTask_estimations.csv'\n",
"# and 'Recommendations generated/ml-100k/Self_GraphTask_reco.csv'"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}