workshops_recommender_systems/P5. Graph-based.ipynb

1385 lines
558 KiB
Plaintext
Raw Normal View History

2020-05-21 13:42:50 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Self made RP3-beta"
]
},
{
"cell_type": "code",
2020-05-21 16:20:12 +02:00
"execution_count": 1,
2020-05-21 13:42:50 +02:00
"metadata": {},
"outputs": [],
"source": [
"import helpers\n",
"import pandas as pd\n",
"import numpy as np\n",
"import scipy.sparse as sparse\n",
"from collections import defaultdict\n",
"from itertools import chain\n",
"import random\n",
"import time\n",
"import matplotlib.pyplot as plt\n",
"\n",
"train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n",
"test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
"train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)"
]
},
{
"cell_type": "code",
2020-05-21 16:20:12 +02:00
"execution_count": 2,
2020-05-21 13:42:50 +02:00
"metadata": {},
"outputs": [],
"source": [
"class RP3Beta():\n",
" def fit(self, train_ui, alpha, beta):\n",
" \"\"\"We weight our edges by user's explicit ratings so if user rated movie high we'll follow that path\n",
" with higher probability.\"\"\"\n",
" self.train_ui=train_ui\n",
" self.train_iu=train_ui.transpose()\n",
" \n",
" self.alpha = alpha\n",
" self.beta = beta\n",
" \n",
" # Define Pui \n",
" Pui=sparse.csr_matrix(self.train_ui/self.train_ui.sum(axis=1))\n",
" \n",
" # Define Piu\n",
" to_divide=np.vectorize(lambda x: x if x>0 else 1)(self.train_iu.sum(axis=1)) # to avoid dividing by zero\n",
" Piu=sparse.csr_matrix(self.train_iu/to_divide)\n",
" item_orders=(self.train_ui>0).sum(axis=0)\n",
" \n",
" Pui = Pui.power(self.alpha)\n",
" Piu = Piu.power(self.alpha)\n",
"\n",
" P3=Pui*Piu*Pui\n",
" \n",
" P3/=np.power(np.vectorize(lambda x: x if x>0 else 1)(item_orders), self.beta)\n",
" \n",
" self.estimations=np.array(P3)\n",
" \n",
" def recommend(self, user_code_id, item_code_id, topK=10):\n",
" \n",
" top_k = defaultdict(list)\n",
" for nb_user, user in enumerate(self.estimations):\n",
" \n",
" user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]\n",
" for item, score in enumerate(user):\n",
" if item not in user_rated and not np.isnan(score):\n",
" top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n",
" result=[]\n",
" # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n",
" for uid, item_scores in top_k.items():\n",
" item_scores.sort(key=lambda x: x[1], reverse=True)\n",
" result.append([uid]+list(chain(*item_scores[:topK])))\n",
" return result\n",
" \n",
" def estimate(self, user_code_id, item_code_id, test_ui):\n",
" result=[]\n",
" for user, item in zip(*test_ui.nonzero()):\n",
" result.append([user_code_id[user], item_code_id[item], \n",
" self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])\n",
" return result"
]
},
{
"cell_type": "code",
2020-05-21 16:20:12 +02:00
"execution_count": 3,
2020-05-21 13:42:50 +02:00
"metadata": {},
"outputs": [],
"source": [
"model=RP3Beta()\n",
"model.fit(train_ui, alpha=1, beta=0)"
]
},
{
"cell_type": "code",
2020-05-21 16:20:12 +02:00
"execution_count": 4,
2020-05-21 13:42:50 +02:00
"metadata": {},
"outputs": [],
"source": [
"top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
"\n",
"top_n.to_csv('Recommendations generated/ml-100k/Self_P3_reco.csv', index=False, header=False)\n",
"\n",
"estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
"estimations.to_csv('Recommendations generated/ml-100k/Self_P3_estimations.csv', index=False, header=False)"
]
},
{
"cell_type": "code",
2020-05-21 16:20:12 +02:00
"execution_count": 5,
2020-05-21 13:42:50 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2020-05-21 16:20:12 +02:00
"943it [00:00, 8810.70it/s]\n"
2020-05-21 13:42:50 +02:00
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3.702446</td>\n",
" <td>3.527273</td>\n",
" <td>0.282185</td>\n",
" <td>0.192092</td>\n",
" <td>0.186749</td>\n",
" <td>0.21698</td>\n",
" <td>0.204185</td>\n",
" <td>0.240096</td>\n",
" <td>0.339114</td>\n",
" <td>0.204905</td>\n",
" <td>0.572157</td>\n",
" <td>0.593544</td>\n",
" <td>0.875928</td>\n",
" <td>1.0</td>\n",
" <td>0.077201</td>\n",
" <td>3.875892</td>\n",
" <td>0.974947</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" RMSE MAE precision recall F_1 F_05 \\\n",
"0 3.702446 3.527273 0.282185 0.192092 0.186749 0.21698 \n",
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
"\n",
" HR Reco in test Test coverage Shannon Gini \n",
"0 0.875928 1.0 0.077201 3.875892 0.974947 "
]
},
2020-05-21 16:20:12 +02:00
"execution_count": 5,
2020-05-21 13:42:50 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import evaluation_measures as ev\n",
"estimations_df=pd.read_csv('Recommendations generated/ml-100k/Self_P3_estimations.csv', header=None)\n",
"reco=np.loadtxt('Recommendations generated/ml-100k/Self_P3_reco.csv', delimiter=',')\n",
"\n",
"ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
" estimations_df=estimations_df, \n",
" reco=reco,\n",
" super_reactions=[4,5])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Let's check hiperparameters"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Alpha"
]
},
{
"cell_type": "code",
2020-05-21 16:20:12 +02:00
"execution_count": 6,
2020-05-21 13:42:50 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/8 [00:00<?, ?it/s]\n",
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 7616.24it/s]\u001b[A\n",
" 12%|█▎ | 1/8 [00:09<01:09, 9.93s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 7819.53it/s]\u001b[A\n",
" 25%|██▌ | 2/8 [00:20<01:00, 10.03s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 8839.47it/s]\u001b[A\n",
" 38%|███▊ | 3/8 [00:30<00:50, 10.01s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 9107.07it/s]\u001b[A\n",
" 50%|█████ | 4/8 [00:39<00:39, 9.86s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 9022.23it/s]\u001b[A\n",
" 62%|██████▎ | 5/8 [00:49<00:29, 9.83s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 8906.39it/s]\u001b[A\n",
" 75%|███████▌ | 6/8 [00:59<00:19, 9.77s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 9033.03it/s]\u001b[A\n",
" 88%|████████▊ | 7/8 [01:08<00:09, 9.75s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 8158.47it/s]\u001b[A\n",
"100%|██████████| 8/8 [01:18<00:00, 9.80s/it]\n"
2020-05-21 13:42:50 +02:00
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Alpha</th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.2</td>\n",
" <td>268.177832</td>\n",
" <td>211.732649</td>\n",
" <td>0.262672</td>\n",
" <td>0.166858</td>\n",
" <td>0.166277</td>\n",
" <td>0.197184</td>\n",
" <td>0.187661</td>\n",
" <td>0.203252</td>\n",
" <td>0.320910</td>\n",
" <td>0.196132</td>\n",
" <td>0.563378</td>\n",
" <td>0.580866</td>\n",
" <td>0.850477</td>\n",
" <td>1.000000</td>\n",
" <td>0.060606</td>\n",
" <td>3.669627</td>\n",
" <td>0.979636</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.4</td>\n",
" <td>10.546689</td>\n",
" <td>7.792373</td>\n",
" <td>0.268505</td>\n",
" <td>0.172669</td>\n",
" <td>0.171569</td>\n",
" <td>0.202643</td>\n",
" <td>0.192489</td>\n",
" <td>0.212653</td>\n",
" <td>0.326760</td>\n",
" <td>0.200172</td>\n",
" <td>0.565148</td>\n",
" <td>0.583801</td>\n",
" <td>0.854719</td>\n",
" <td>1.000000</td>\n",
" <td>0.064214</td>\n",
" <td>3.726996</td>\n",
" <td>0.978426</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.6</td>\n",
" <td>3.143988</td>\n",
" <td>2.948790</td>\n",
" <td>0.274655</td>\n",
" <td>0.180502</td>\n",
" <td>0.177820</td>\n",
" <td>0.208730</td>\n",
" <td>0.198176</td>\n",
" <td>0.222746</td>\n",
" <td>0.332872</td>\n",
" <td>0.203290</td>\n",
" <td>0.568872</td>\n",
" <td>0.587738</td>\n",
" <td>0.870626</td>\n",
" <td>1.000000</td>\n",
" <td>0.065657</td>\n",
" <td>3.785282</td>\n",
" <td>0.977090</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.8</td>\n",
" <td>3.670728</td>\n",
" <td>3.495735</td>\n",
" <td>0.281972</td>\n",
" <td>0.189868</td>\n",
" <td>0.185300</td>\n",
" <td>0.216071</td>\n",
" <td>0.203541</td>\n",
" <td>0.236751</td>\n",
" <td>0.339867</td>\n",
" <td>0.206688</td>\n",
" <td>0.573729</td>\n",
" <td>0.592432</td>\n",
" <td>0.874867</td>\n",
" <td>1.000000</td>\n",
" <td>0.070707</td>\n",
" <td>3.832415</td>\n",
" <td>0.975998</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>3.702446</td>\n",
" <td>3.527273</td>\n",
" <td>0.282185</td>\n",
" <td>0.192092</td>\n",
" <td>0.186749</td>\n",
" <td>0.216980</td>\n",
" <td>0.204185</td>\n",
" <td>0.240096</td>\n",
" <td>0.339114</td>\n",
" <td>0.204905</td>\n",
" <td>0.572157</td>\n",
" <td>0.593544</td>\n",
" <td>0.875928</td>\n",
" <td>1.000000</td>\n",
" <td>0.077201</td>\n",
" <td>3.875892</td>\n",
" <td>0.974947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.2</td>\n",
" <td>3.704441</td>\n",
" <td>3.529251</td>\n",
" <td>0.280912</td>\n",
" <td>0.193633</td>\n",
" <td>0.187311</td>\n",
" <td>0.216872</td>\n",
" <td>0.203004</td>\n",
" <td>0.240588</td>\n",
" <td>0.338049</td>\n",
" <td>0.203453</td>\n",
" <td>0.571830</td>\n",
" <td>0.594313</td>\n",
" <td>0.883351</td>\n",
" <td>1.000000</td>\n",
" <td>0.085859</td>\n",
" <td>3.910718</td>\n",
" <td>0.974073</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.4</td>\n",
" <td>3.704580</td>\n",
" <td>3.529388</td>\n",
" <td>0.273595</td>\n",
" <td>0.190651</td>\n",
" <td>0.183874</td>\n",
" <td>0.212183</td>\n",
" <td>0.199464</td>\n",
" <td>0.239118</td>\n",
" <td>0.329550</td>\n",
" <td>0.195433</td>\n",
" <td>0.566171</td>\n",
" <td>0.592793</td>\n",
" <td>0.871686</td>\n",
" <td>1.000000</td>\n",
" <td>0.107504</td>\n",
" <td>3.961915</td>\n",
" <td>0.972674</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.6</td>\n",
" <td>3.704591</td>\n",
" <td>3.529399</td>\n",
" <td>0.263097</td>\n",
" <td>0.186255</td>\n",
" <td>0.178709</td>\n",
" <td>0.205170</td>\n",
" <td>0.191094</td>\n",
" <td>0.232920</td>\n",
" <td>0.317439</td>\n",
" <td>0.184917</td>\n",
" <td>0.552349</td>\n",
" <td>0.590545</td>\n",
" <td>0.868505</td>\n",
" <td>0.999576</td>\n",
" <td>0.156566</td>\n",
" <td>4.060156</td>\n",
" <td>0.969203</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Alpha RMSE MAE precision recall F_1 F_05 \\\n",
"0 0.2 268.177832 211.732649 0.262672 0.166858 0.166277 0.197184 \n",
"0 0.4 10.546689 7.792373 0.268505 0.172669 0.171569 0.202643 \n",
"0 0.6 3.143988 2.948790 0.274655 0.180502 0.177820 0.208730 \n",
"0 0.8 3.670728 3.495735 0.281972 0.189868 0.185300 0.216071 \n",
"0 1.0 3.702446 3.527273 0.282185 0.192092 0.186749 0.216980 \n",
"0 1.2 3.704441 3.529251 0.280912 0.193633 0.187311 0.216872 \n",
"0 1.4 3.704580 3.529388 0.273595 0.190651 0.183874 0.212183 \n",
"0 1.6 3.704591 3.529399 0.263097 0.186255 0.178709 0.205170 \n",
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
"0 0.187661 0.203252 0.320910 0.196132 0.563378 0.580866 \n",
"0 0.192489 0.212653 0.326760 0.200172 0.565148 0.583801 \n",
"0 0.198176 0.222746 0.332872 0.203290 0.568872 0.587738 \n",
"0 0.203541 0.236751 0.339867 0.206688 0.573729 0.592432 \n",
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
"0 0.203004 0.240588 0.338049 0.203453 0.571830 0.594313 \n",
"0 0.199464 0.239118 0.329550 0.195433 0.566171 0.592793 \n",
"0 0.191094 0.232920 0.317439 0.184917 0.552349 0.590545 \n",
"\n",
" HR Reco in test Test coverage Shannon Gini \n",
"0 0.850477 1.000000 0.060606 3.669627 0.979636 \n",
"0 0.854719 1.000000 0.064214 3.726996 0.978426 \n",
"0 0.870626 1.000000 0.065657 3.785282 0.977090 \n",
"0 0.874867 1.000000 0.070707 3.832415 0.975998 \n",
"0 0.875928 1.000000 0.077201 3.875892 0.974947 \n",
"0 0.883351 1.000000 0.085859 3.910718 0.974073 \n",
"0 0.871686 1.000000 0.107504 3.961915 0.972674 \n",
"0 0.868505 0.999576 0.156566 4.060156 0.969203 "
]
},
2020-05-21 16:20:12 +02:00
"execution_count": 6,
2020-05-21 13:42:50 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from tqdm import tqdm\n",
"result=[]\n",
"for alpha in tqdm([round(i,1) for i in np.arange(0.2,1.6001,0.2)]):\n",
" model=RP3Beta()\n",
" model.fit(train_ui, alpha=alpha, beta=0)\n",
" reco=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
" estimations_df=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
" to_append=ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
" estimations_df=estimations_df, \n",
" reco=np.array(reco),\n",
" super_reactions=[4,5])\n",
" to_append.insert(0, \"Alpha\", alpha)\n",
" result.append(to_append)\n",
" \n",
"result=pd.concat(result)\n",
"result"
]
},
{
"cell_type": "code",
2020-05-21 16:20:12 +02:00
"execution_count": 7,
2020-05-21 13:42:50 +02:00
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
2020-05-21 16:20:12 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABCwAAAkoCAYAAACgVC5GAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAIABJREFUeJzs3Xt8VeWZ9//PlXM2ISQ7BEhCIqi0cpCgImoVz+cS1Km1OLXV2tZOq2OP83rap9NpH5/2Nx2dnmytY512erLY2taWKJ7FVp8KikoQRCrKKSRACMcAOV+/P/ZK3ITNOTtrJ/v7fr32i+x73Wvva6ncrlzrvq/b3B0RERERERERkVSSEXYAIiIiIiIiIiJ9KWEhIiIiIiIiIilHCQsRERERERERSTlKWIiIiIiIiIhIylHCQkRERERERERSjhIWIiIiIiIiIpJylLAQERERERGRXmb2YTN78jD6/ZeZfW0gYpL0pISFDDlmtsbM9ppZi5ltNLOfm1lBcOznZuZmNrvPOd8P2m8K3ueY2XfMrD74nNVm9r0DfEfP60cDeqEiIoNIMG62m9nIPu1LgvF3XFzbN4K2GX363mRmXX3G3hYzKx+YqxARSQ/u/oC7X3oY/f7J3f/vQMQk6UkJCxmqaty9AJgGnAJ8Je7Y34Ebe96YWRbwQeDtuD5fAaYDM4DhwAXAa4m+I+51W/9fhojIkLIauL7njZmdDOTHdzAzAz4CbCVurI7zYp+xt8DdG5IZtIjIYBTc44oMakpYyJDm7huBJ4glLnrUAmebWXHw/nJgKbAxrs/pwMPu3uAxa9z9lwMStIjI0PUr4KNx728E+o6tM4Fy4LPAHDPLGaDYREQGhWDG2lfM7A0z22Zm/2NmeWZ2fjA7+H+Z2Ubgf4L+s4LZbNvN7G9mNjXusyrN7I9m1mRmzT0zhoMZbS8EP5uZfc/MNpvZDjNbamZTgmM/N7Nvxn3eJ81slZltNbN58TPggplz/2RmbwVx3xMkqUUOSAkLGdLMbCxwBbAqrrkVmAfMCd5/lP1vmBcCXzCzz5jZyRpMRUT6xUKg0Mwmmlkm8CHg13363Egssfzb4P2sAYxPRGSw+DBwGXAC8B7gX4P2MUAUOA64xcxOBX4GfAooAe4D5plZbjAOPwKsBcYBFcCDCb7rUuDc4HuKiI3dzX07mdmFwL8D1wFlwef2/bxZxB4MVgf9LjviK5e0ooSFDFV/MrNdwHpgM/D1Psd/CXzUzEYA5wF/6nP834H/IPY/g8XABjPrOzX5T0Gmuuf1yX6/ChGRoadnlsUlwJvAhp4DZhYhtkTvN+7eAfye/ZeFnNln7H0bEZH08yN3X+/uW4Fv8e5yu27g6+7e5u57gU8C97n7InfvcvdfAG3AmcSWPpcD/+Luu9291d1fSPBdHcSWSJ8EmLuvcPfGBP0+DPzM3V919zZiS6zPiq9RBHzb3be7+zpgAfvOghbZjxIWMlRd7e7DgfOJDa77FHkLBuNSYtnoR4IBPf54l7vf4+5nE8skfwv4mZlN7PMdRXGv+5N4PSIiQ8WvgH8EbmL/2W3XAJ3A/OD9A8AVZlYa12dhn7H3hGQHLCKSgtbH/byWWOIBoMndW+OOHQd8MT7RC1QG/SuBte7eebAvcvdngR8B9wCbzOwnZlaYoGt5EEvPeS3EZmJUxPWJX4K9Byg42HeLKGEhQ5q7/wX4OfCfCQ7/Gvgi+98w9/2Mve5+D7ANmNTfMYqIpBN3X0us+OaVwB/7HL6R2M3rumD99UNANnGFOkVEBIglG3pUAT3Fh71Pv/XAt/okeiPuPjc4VnU4xTnd/W53Pw2YTGxpyL8k6NZALEECgJkNI7YMZUOCviKHRQkLSQffBy4xs75Tzu4mNiX5r31PMLPPBYWL8s0sK1gOMpz9dwoREZEj93HgQnffHddWAVxEbH3ztOBVTWx5XqLdQkRE0tmtZjbWzKLA/+bduj993Q/8k5mdERTPHGZm7zez4cBLQCPw7aA9z8zO7vsBZnZ6cH42sJtYPbiuBN/1G+BjZjbNzHKB/w9Y5O5rjvlqJW0pYSFDnrs3EZtF8bU+7Vvd/Rl375uJBtgLfIfYtLUtwK3AB9z9nbg+tWbWEvd6OEmXICIypLj72+6+uE/zTGCJuz/p7ht7XsSSy1N7KtITWw/d0ud1+oBegIhI+H4DPAm8E7y+mahTMNZ+ktiSjm3ECtHfFBzrAmqAE4F1QD2xgpp9FRJLfGwjtuSjmQSzl939GWL3238glgg5gXeL3IscFUv8u5qIiIiIiIikGjNbA3zC3Z8OOxaRZNMMCxERERERERFJOUpYiIiIiIiIiEjK0ZIQEREREREREUk5mmEhIiIiIiIiIilHCQsRERERERERSTlZYQfQH0aOHOnjxo0LOwwRkf288sorW9y9NOw4BoLGYhFJRRqHRUTCd7Rj8ZBIWIwbN47Fi/tu5y4iEj4zWxt2DANFY7GIpCKNwyIi4TvasVhLQkREREREREQk5ShhISIiIiIiIiIpRwkLEREREREREUk5Q6KGhYikho6ODurr62ltbQ07lAGXl5fH2LFjyc7ODjsUEZH9pMP4rHFYRI5VOoyVydbfY7ESFiLSb+rr6xk+fDjjxo3DzMIOZ8C4O83NzdTX1zN+/PiwwxER2c9QH58Hahw2s8uBHwCZwH+7+7f7HP8C8AmgE2gCbnb3tcGxO4H3E5vh/BTwWSAfeAg4AegCat39y0H/m4C7gA3Bx//I3f87aRcnIkN+rEy2ZIzFWhIiIv2mtbWVkpKStBvgzYySkhJl40UkZQ318XkgxmEzywTuAa4AJgHXm9mkPt1eA6a7+1Tg98CdwbnvA84GpgJTgNOB84Jz/tPdTwJOAc42syviPu+37j4teClZIZJkQ32sTLZkjMVKWIhIv0rXAT5dr1tEBo+hPk4NwPXNAFa5+zvu3g48CFwV38HdF7j7nuDtQmBszyEgD8gBcoFsYJO773H3BcG57cCrceeISAiG+liZbP39z08JCxEZUjIzM5k2bRpTpkyhpqaG7du3A7BmzRrMjK997Wu9fbds2UJ2dja33XYbACtXruT8889n2rRpTJw4kVtuuQWA5557jhEjRjBt2rTe19NPPz3wFyciMoiZGR/5yEd633d2dlJaWsqsWbP26XfVVVdx1lln7dP2jW98g4qKin3G4Z7xfQBVAOvj3tcHbQfyceAxAHd/EVgANAavJ9x9RXxnMysCaoBn4po/YGZLzez3ZlZ57JcgIunqfe9730GPX3nllWGMq4ekhIWIDCn5+fksWbKEZcuWEY1Gueeee3qPHX/88TzyyCO97x966CEmT57c+/7222/n85//PEuWLGHFihX88z//c++xmTNnsmTJkt7XxRdfPDAXJCIyRAwbNoxly5axd+9eAJ566ikqKvb9fX/79u28+uqrbN++ndWrV+9zrGd87nkVFRUNWOyBRI8NPWFHsxuA6cRqUGBmJwITic2eqAAuNLNz4/pnAXOBu939naC5FhgXLC95GvjFAb7rFjNbbGaLm5qajurCRGRw6erqOuJz/va3vx30+Pz588MYVw9JCQsRGbLOOussNmzY0Ps+Pz+fiRMnsnjxYgB++9vfct111/Ueb2xsZOzYd2finnzyyQMXrIhIGrjiiit49NFHAZg7dy7XX3/9Psf/8Ic/UFNTw5w5c3jwwQfDCPFg6oH4WQ5jgYa+nczsYuCrwGx3bwuarwEWunuLu7cQm3lxZtxpPwHecvfv9zS4e3Pc+fcDpyUKyt1/4u7T3X16aWnpUV6aiKSKNWvWcNJJJ3HjjTcydepUrr32Wvbs2cO4ceO44447OOecc3jooYd4++23ufzyyznttNOYOXMmb775JgCbNm3immuuobq6murq6t5ERUFBARC73z333HN7ZyQ///zzAIwbN44tW7YA8N3vfpcpU6YwZcoUvv/97/fGNXHiRD75yU8yefJkLr300t4EdDJplxARSYr/U7ucNxp29utnTiov5Os1kw/dkVjm+ZlnnuHjH//4Pu09N8FjxowhMzOT8vJ
2020-05-21 13:42:50 +02:00
"text/plain": [
2020-05-21 16:20:12 +02:00
"<matplotlib.figure.Figure at 0x7fd89561d7f0>"
2020-05-21 13:42:50 +02:00
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"metrics=list(result.columns[[i not in ['Alpha'] for i in result.columns]])\n",
"\n",
"charts_per_row=6\n",
"charts_per_column=3\n",
"\n",
"fig, axes = plt.subplots(nrows=charts_per_row, ncols=charts_per_column,figsize=(18, 7*charts_per_row ))\n",
"import itertools\n",
"to_iter=[i for i in itertools.product(range(charts_per_row), range(charts_per_column))]\n",
"\n",
"for i in range(len(metrics)):\n",
" df=result[['Alpha', metrics[i]]]\n",
" df.plot(ax=axes[to_iter[i]], title=metrics[i], x=0, y=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Beta"
]
},
{
"cell_type": "code",
2020-05-21 16:20:12 +02:00
"execution_count": 8,
2020-05-21 13:42:50 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/10 [00:00<?, ?it/s]\n",
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 8868.94it/s]\u001b[A\n",
" 10%|█ | 1/10 [00:09<01:28, 9.79s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 8891.53it/s]\u001b[A\n",
" 20%|██ | 2/10 [00:19<01:18, 9.78s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 8088.73it/s]\u001b[A\n",
" 30%|███ | 3/10 [00:29<01:08, 9.79s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 8451.02it/s]\u001b[A\n",
" 40%|████ | 4/10 [00:39<00:58, 9.80s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 8600.04it/s]\u001b[A\n",
" 50%|█████ | 5/10 [00:49<00:49, 9.88s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 8875.99it/s]\u001b[A\n",
" 60%|██████ | 6/10 [00:58<00:39, 9.82s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 8967.39it/s]\u001b[A\n",
" 70%|███████ | 7/10 [01:08<00:29, 9.78s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 8853.26it/s]\u001b[A\n",
" 80%|████████ | 8/10 [01:18<00:19, 9.78s/it]\n",
2020-05-21 13:42:50 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2020-05-21 16:20:12 +02:00
"943it [00:00, 9016.27it/s]\u001b[A\n",
" 90%|█████████ | 9/10 [01:28<00:09, 9.92s/it]\n",
"943it [00:00, 9544.19it/s]\n",
"100%|██████████| 10/10 [01:38<00:00, 9.86s/it]\n"
2020-05-21 13:42:50 +02:00
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Beta</th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0</td>\n",
" <td>3.702446</td>\n",
" <td>3.527273</td>\n",
" <td>0.282185</td>\n",
" <td>0.192092</td>\n",
" <td>0.186749</td>\n",
" <td>0.216980</td>\n",
" <td>0.204185</td>\n",
" <td>0.240096</td>\n",
" <td>0.339114</td>\n",
" <td>0.204905</td>\n",
" <td>0.572157</td>\n",
" <td>0.593544</td>\n",
" <td>0.875928</td>\n",
" <td>1.000000</td>\n",
" <td>0.077201</td>\n",
" <td>3.875892</td>\n",
" <td>0.974947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.1</td>\n",
" <td>3.703312</td>\n",
" <td>3.528128</td>\n",
" <td>0.290138</td>\n",
" <td>0.197597</td>\n",
" <td>0.192259</td>\n",
" <td>0.223336</td>\n",
" <td>0.210944</td>\n",
" <td>0.246153</td>\n",
" <td>0.347768</td>\n",
" <td>0.212034</td>\n",
" <td>0.581038</td>\n",
" <td>0.596328</td>\n",
" <td>0.884411</td>\n",
" <td>1.000000</td>\n",
" <td>0.085137</td>\n",
" <td>3.957416</td>\n",
" <td>0.972784</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.2</td>\n",
" <td>3.703825</td>\n",
" <td>3.528636</td>\n",
" <td>0.297137</td>\n",
" <td>0.201202</td>\n",
" <td>0.196067</td>\n",
" <td>0.228169</td>\n",
" <td>0.218026</td>\n",
" <td>0.252767</td>\n",
" <td>0.355655</td>\n",
" <td>0.219909</td>\n",
" <td>0.588904</td>\n",
" <td>0.598160</td>\n",
" <td>0.886532</td>\n",
" <td>1.000000</td>\n",
" <td>0.094517</td>\n",
" <td>4.053212</td>\n",
" <td>0.969980</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.3</td>\n",
" <td>3.704130</td>\n",
" <td>3.528939</td>\n",
" <td>0.303499</td>\n",
" <td>0.204749</td>\n",
" <td>0.199901</td>\n",
" <td>0.232829</td>\n",
" <td>0.225107</td>\n",
" <td>0.260797</td>\n",
" <td>0.363757</td>\n",
" <td>0.226825</td>\n",
" <td>0.599969</td>\n",
" <td>0.599964</td>\n",
" <td>0.888653</td>\n",
" <td>1.000000</td>\n",
" <td>0.105339</td>\n",
" <td>4.147779</td>\n",
" <td>0.966948</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.4</td>\n",
" <td>3.704313</td>\n",
" <td>3.529120</td>\n",
" <td>0.308908</td>\n",
" <td>0.208811</td>\n",
" <td>0.203854</td>\n",
" <td>0.237241</td>\n",
" <td>0.229614</td>\n",
" <td>0.266918</td>\n",
" <td>0.370758</td>\n",
" <td>0.232673</td>\n",
" <td>0.609385</td>\n",
" <td>0.602014</td>\n",
" <td>0.895016</td>\n",
" <td>0.999894</td>\n",
" <td>0.132035</td>\n",
" <td>4.259682</td>\n",
" <td>0.962989</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.5</td>\n",
" <td>3.704422</td>\n",
" <td>3.529229</td>\n",
" <td>0.314316</td>\n",
" <td>0.211411</td>\n",
" <td>0.206768</td>\n",
" <td>0.240986</td>\n",
" <td>0.237124</td>\n",
" <td>0.273416</td>\n",
" <td>0.378307</td>\n",
" <td>0.239297</td>\n",
" <td>0.622792</td>\n",
" <td>0.603327</td>\n",
" <td>0.903499</td>\n",
" <td>0.999046</td>\n",
" <td>0.168831</td>\n",
" <td>4.411281</td>\n",
" <td>0.956648</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.6</td>\n",
" <td>3.704488</td>\n",
" <td>3.529295</td>\n",
" <td>0.314634</td>\n",
" <td>0.206209</td>\n",
" <td>0.204818</td>\n",
" <td>0.240159</td>\n",
" <td>0.242489</td>\n",
" <td>0.273850</td>\n",
" <td>0.376438</td>\n",
" <td>0.238428</td>\n",
" <td>0.622042</td>\n",
" <td>0.600721</td>\n",
" <td>0.897137</td>\n",
" <td>0.996394</td>\n",
" <td>0.212843</td>\n",
" <td>4.621938</td>\n",
" <td>0.945932</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.7</td>\n",
" <td>3.704528</td>\n",
" <td>3.529335</td>\n",
" <td>0.304136</td>\n",
" <td>0.187298</td>\n",
" <td>0.191990</td>\n",
" <td>0.228749</td>\n",
" <td>0.238305</td>\n",
" <td>0.256201</td>\n",
" <td>0.358807</td>\n",
" <td>0.226808</td>\n",
" <td>0.593897</td>\n",
" <td>0.591207</td>\n",
" <td>0.868505</td>\n",
" <td>0.983033</td>\n",
" <td>0.256854</td>\n",
" <td>4.898568</td>\n",
" <td>0.928065</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.8</td>\n",
" <td>3.704552</td>\n",
" <td>3.529360</td>\n",
" <td>0.266384</td>\n",
" <td>0.147571</td>\n",
" <td>0.158660</td>\n",
" <td>0.194838</td>\n",
" <td>0.214485</td>\n",
" <td>0.209336</td>\n",
" <td>0.299850</td>\n",
" <td>0.184356</td>\n",
" <td>0.492852</td>\n",
" <td>0.571152</td>\n",
" <td>0.803818</td>\n",
" <td>0.936373</td>\n",
" <td>0.341270</td>\n",
" <td>5.257397</td>\n",
" <td>0.895882</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.9</td>\n",
" <td>3.704567</td>\n",
" <td>3.529375</td>\n",
" <td>0.162354</td>\n",
" <td>0.076967</td>\n",
" <td>0.089233</td>\n",
" <td>0.114583</td>\n",
" <td>0.134657</td>\n",
" <td>0.113253</td>\n",
" <td>0.160868</td>\n",
" <td>0.085486</td>\n",
" <td>0.243590</td>\n",
" <td>0.535405</td>\n",
" <td>0.580064</td>\n",
" <td>0.800106</td>\n",
" <td>0.415584</td>\n",
" <td>5.563910</td>\n",
" <td>0.857396</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Beta RMSE MAE precision recall F_1 F_05 \\\n",
"0 0.0 3.702446 3.527273 0.282185 0.192092 0.186749 0.216980 \n",
"0 0.1 3.703312 3.528128 0.290138 0.197597 0.192259 0.223336 \n",
"0 0.2 3.703825 3.528636 0.297137 0.201202 0.196067 0.228169 \n",
"0 0.3 3.704130 3.528939 0.303499 0.204749 0.199901 0.232829 \n",
"0 0.4 3.704313 3.529120 0.308908 0.208811 0.203854 0.237241 \n",
"0 0.5 3.704422 3.529229 0.314316 0.211411 0.206768 0.240986 \n",
"0 0.6 3.704488 3.529295 0.314634 0.206209 0.204818 0.240159 \n",
"0 0.7 3.704528 3.529335 0.304136 0.187298 0.191990 0.228749 \n",
"0 0.8 3.704552 3.529360 0.266384 0.147571 0.158660 0.194838 \n",
"0 0.9 3.704567 3.529375 0.162354 0.076967 0.089233 0.114583 \n",
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
"0 0.210944 0.246153 0.347768 0.212034 0.581038 0.596328 \n",
"0 0.218026 0.252767 0.355655 0.219909 0.588904 0.598160 \n",
"0 0.225107 0.260797 0.363757 0.226825 0.599969 0.599964 \n",
"0 0.229614 0.266918 0.370758 0.232673 0.609385 0.602014 \n",
"0 0.237124 0.273416 0.378307 0.239297 0.622792 0.603327 \n",
"0 0.242489 0.273850 0.376438 0.238428 0.622042 0.600721 \n",
"0 0.238305 0.256201 0.358807 0.226808 0.593897 0.591207 \n",
"0 0.214485 0.209336 0.299850 0.184356 0.492852 0.571152 \n",
"0 0.134657 0.113253 0.160868 0.085486 0.243590 0.535405 \n",
"\n",
" HR Reco in test Test coverage Shannon Gini \n",
"0 0.875928 1.000000 0.077201 3.875892 0.974947 \n",
"0 0.884411 1.000000 0.085137 3.957416 0.972784 \n",
"0 0.886532 1.000000 0.094517 4.053212 0.969980 \n",
"0 0.888653 1.000000 0.105339 4.147779 0.966948 \n",
"0 0.895016 0.999894 0.132035 4.259682 0.962989 \n",
"0 0.903499 0.999046 0.168831 4.411281 0.956648 \n",
"0 0.897137 0.996394 0.212843 4.621938 0.945932 \n",
"0 0.868505 0.983033 0.256854 4.898568 0.928065 \n",
"0 0.803818 0.936373 0.341270 5.257397 0.895882 \n",
"0 0.580064 0.800106 0.415584 5.563910 0.857396 "
]
},
2020-05-21 16:20:12 +02:00
"execution_count": 8,
2020-05-21 13:42:50 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from tqdm import tqdm\n",
"result=[]\n",
"for beta in tqdm([round(i,1) for i in np.arange(0,1,0.1)]):\n",
" model=RP3Beta()\n",
" model.fit(train_ui, alpha=1, beta=beta)\n",
" reco=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
" estimations_df=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
" to_append=ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
" estimations_df=estimations_df, \n",
" reco=np.array(reco),\n",
" super_reactions=[4,5])\n",
" to_append.insert(0, \"Beta\", beta)\n",
" result.append(to_append)\n",
" \n",
"result=pd.concat(result)\n",
"result"
]
},
{
"cell_type": "code",
2020-05-21 16:20:12 +02:00
"execution_count": 9,
2020-05-21 13:42:50 +02:00
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
2020-05-21 16:20:12 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABCwAAAkoCAYAAACgVC5GAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAIABJREFUeJzs3XlcVXX+x/HXlx1BwA0UEHEXUMFdM5c0l8q0PcsWc8qpqV97M1NT00zTzDTTYtMytkw1LaZmm+1qmZWTe+G+KwKioCgCIvv398e9EBKWKdzD8n4+HvfBPed8z72fQ/nl3M/9fj9fY61FRERERERERKQ+8XI6ABERERERERGR6pSwEBEREREREZF6RwkLEREREREREal3lLAQERERERERkXpHCQsRERERERERqXeUsBARERERERGRekcJCxEREREREalkjJlijFl4Eu2eM8Y84ImYpGlSwkIaHWNMijHmmDEm3xiz3xjzX2NMsPvYf40x1hgzsdo5T7r3T3Vv+xljHjfGpLtfZ7cxZsYJ3qPi8YxHL1REpIFw95nFxpjW1fYnu/ve2Cr7/uTeN7Ba26nGmLJq/W6+MSbSM1chItJ0WGtnWWvHnkS7G621f/FETNI0KWEhjdX51tpgIAnoA9xb5dg24NqKDWOMD3ApsLNKm3uB/sBAoDlwFvB9Te9R5XFL7V+GiEijsRu4omLDGNMLCKzawBhjgKuBQ1Tpp6tYVq3fDbbWZtRl0CIiDZX7HlekQVPCQho1a+1+YAGuxEWFD4GhxpgW7u3xwDpgf5U2A4D3rLUZ1iXFWvuaR4IWEWmcXgeuqbJ9LVC9Xx0GRAK3AZONMX4eik1EpMFwj1q71xizyRhz2BjzijEmwBgz0j06+HfGmP3AK+72E9wj2nKMMd8aY3pXea32xph3jTEHjDHZFSOG3aPalrqfG2PMDGNMljHmiDFmnTGmp/vYf40xD1d5vRuMMTuMMYeMMR9UHQXnHj13ozFmuzvuZ92JapETUsJCGjVjTDRwDrCjyu5C4ANgsnv7Gn5807wcuNMY8xtjTC91piIip205EGKMiTPGeAOXA29Ua3MtrqTyXPf2BA/GJyLSkEwBxgGdgW7A/e79bYGWQAdgujGmL/Ay8GugFfA88IExxt/dF38E7AFigShgTg3vNRYY7n6fMFz9d3b1RsaYUcDfgcuAdu7Xrf56E3B9MZjobjfuF1+5NClKWEhj9b4xJg9IA7KAB6sdfw24xhgTCowA3q92/O/AP3D9MVgN7DXGVB+e/L47U13xuKHWr0JEpHGpGGUxBtgC7K04YIxphmt63pvW2hLgbX48LWRwtX53JyIiTdMz1to0a+0h4K/8MOWuHHjQWltkrT0G3AA8b61dYa0ts9a+ChQBg3FNfY4E7rHWHrXWFlprl9bwXiW4pkj3AIy1drO1dl8N7aYAL1trv7PWFuGaYj2kap0i4BFrbY61NhX4kuNHQYv8iBIW0lhdYK1tDozE1bkeV+jN3Rm3wZWN/sjdoVc9XmatfdZaOxRXJvmvwMvGmLhq7xFW5fFiHV6PiEhj8DpwJTCVH49suxAoBT5xb88CzjHGtKnSZnm1frdzXQcsIlJPpVV5vgdX4gHggLW2sMqxDsBdVZO9QHt3+/bAHmtt6U+9kbV2MfAM8CyQaYx5wRgTUkPTSHcsFefl4xqJEVWlTdUp2AVA8E+9t4gSFtKoWWu/Av4LPFbD4TeAu/jxTXP11zhmrX0WOAzE13aMIiJNhbV2D67im+cC71Y7fC2uG9dU99zreYAvVQp1iohIpfZVnscAFQWIbbV2acBfqyV7m1lrZ7uPxZxMcU5r7VPW2n5AAq6pIffU0CwDV4IEAGNMEK5pKHtraCtyUpSwkKbgSWCMMab6kLOncA1L/rr6CcaY292FiwKNMT7u6SDN+fFKISIi8sv8ChhlrT1aZV8UMBrX3OYk9yMR19S8mlYLERFp6m42xkQbY1oC9/FD7Z/qXgRuNMYMchfPDDLGnGeMaQ6sBPYBj7j3BxhjhlZ/AWPMAPf5vsBRXPXgymp4rzeB64wxScYYf+BvwAprbcppX600WUpYSKNnrT2AaxTFA9X2H7LWfmGtrZ6JBjgGPI5r2NpB4GbgYmvtriptPjTG5Fd5vFdHlyAi0mhYa3daa1dX2z0MSLbWLrTW7q944Eos966oRo9rLnR+tccAj16AiEj98CawENjlfjxcUyN3f3sDrikdh3EVop/qPlYGnA90AVKBdFwFNasLwZX4OIxrykc2NYxettZ+get++x1ciZDO/FDkXuSUmJo/q4mIiIiIiEh9Y4xJAa631n7udCwidU0jLERERERERESk3lHCQkRERERERETqHU0JEREREREREZF6RyMsRERERERERKTeUcJCREREREREROodH6cDqA2tW7e2sbGxTochInKcNWvWHLTWtnE6Dk9RXywi9VFT6ovVD4tIfXQ6/XCjSFjExsayenX1Jd1FRJxljNnjdAyepL5YROojp/tiY8x44F+AN/Afa+0j1Y7fCNwMlAH5wHRr7SZjzBjgEcAPKAbusdYu/qn3Uj8sIvXR6fTDmhIiIiIiIlIHjDHewLPAOUA8cIUxJr5aszettb2stUnAP4En3PsPAudba3sB1wKveyhsEZF6QwkLEREREZG6MRDYYa3dZa0tBuYAk6o2sNbmVtkMAqx7//fW2gz3/o1AgDHG3wMxi4jUG41iSoiIiIiISD0UBaRV2U4HBlVvZIy5GbgT1/SPUTW8zsXA99baohrOnQ5MB4iJiamFkEVE6o9Gm7AoKSkhPT2dwsJCp0PxuICAAKKjo/H19XU6FBFp4hp7X6z+VkR+hqlhn/3RDmufBZ41xlwJ3I9rCojrBYxJAP4BjK3pDay1LwAvAPTv3/9Hry0iJ6+x37fUtbq4L2q0CYv09HSaN29ObGwsxtT0t6JxstaSnZ1Neno6HTt2dDocEWniGnNfrP5WRE5COtC+ynY0kHGCtuCaMjKzYsMYEw28B1xjrd1ZJxGKSKXGfN9S1+rqvqjR1rAoLCykVatWTe5/NGMMrVq1UlZQROqFxtwXq78VkZOwCuhqjOlojPEDJgMfVG1gjOlaZfM8YLt7fxjwMXCvtfZ/HopXpElrzPctda2u7osabcICaLL/ozXV6xaR+qkx90mN+dpE5PRZa0uBW4AFwGbgLWvtRmPMQ8aYie5mtxhjNhpjknHVsaiYDnIL0AV4wBiT7H6Ee/oaRJoa/W0/dXXxu2vUCQuneXt7k5SURM+ePTn//PPJyckBICUlBWMMDzzwQGXbgwcP4uvryy233ALA1q1bGTlyJElJScTFxTF9+nQAlixZQmhoKElJSZWPzz//3PMXJyLSQBhjuPrqqyu3S0tLadOmDRMmTDiu3aRJkxgyZMhx+/70pz8RFRV1XJ9b0ZeLiJwMa+0n1tpu1trO1tq/uvf90Vr7gfv5bdbaBGttkrX2LGvtRvf+h621Qe79FY8sJ69FRBquM8444yePn3vuufXyHkcJizoUGBhIcnIyGzZsoGXLljz77LOVxzp16sRHH31UuT1v3jwSEhIqt2+99VbuuOMOkpOT2bx5M//3f/9XeWzYsGEkJydXPs4++2zPXJCISAMUFBTEhg0bOHbsGACLFi0iKirquDY5OTl899135OTksHv37uOOVfTFFY+wsDCPxS4iIiJSXVlZ2S8+59tvv/3J45988km9vMdRwsJDhgwZwt69eyu3AwMDiYuLY/Xq1QDMnTuXyy67rPL4vn37iI6Ortzu1auX54IVEWlkzjnnHD7++GMAZs+ezRVXXHHc8XfeeYfzzz+fyZMnM2fOHCdCFBERESElJYUePXpw7bXX0rt3by655BIKCgqIjY3loYce4swzz2TevHns3LmT8ePH069fP4YNG8aWLVsAyMzM5MILLyQxMZHExMTKREVwcDDg+pw5fPjwypkA33zzDQCxsbEcPHgQgCeeeIKePXvSs2dPnnzyycq44uLiuOGGG0hISGDs2LGVXwbVpUa7SkhVf/5wI5sycmv
2020-05-21 13:42:50 +02:00
"text/plain": [
2020-05-21 16:20:12 +02:00
"<matplotlib.figure.Figure at 0x7fd894586dd8>"
2020-05-21 13:42:50 +02:00
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"### import matplotlib.pyplot as plt\n",
"\n",
"metrics=list(result.columns[[i not in ['Beta'] for i in result.columns]])\n",
"\n",
"charts_per_row=6\n",
"charts_per_column=3\n",
"\n",
"fig, axes = plt.subplots(nrows=charts_per_row, ncols=charts_per_column,figsize=(18, 7*charts_per_row ))\n",
"import itertools\n",
"to_iter=[i for i in itertools.product(range(charts_per_row), range(charts_per_column))]\n",
"\n",
"for i in range(len(metrics)):\n",
" df=result[['Beta', metrics[i]]]\n",
" df.plot(ax=axes[to_iter[i]], title=metrics[i], x=0, y=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Check sample recommendations"
]
},
{
"cell_type": "code",
2020-05-21 16:20:12 +02:00
"execution_count": 11,
2020-05-21 13:42:50 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user</th>\n",
" <th>rating</th>\n",
" <th>title</th>\n",
" <th>genres</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>66489</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Postino, Il (1994)</td>\n",
" <td>Drama, Romance</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>15966</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Godfather, The (1972)</td>\n",
" <td>Action, Crime, Drama</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>50067</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Mrs. Brown (Her Majesty, Mrs. Brown) (1997)</td>\n",
" <td>Drama, Romance</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>65435</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Speed (1994)</td>\n",
" <td>Action, Romance, Thriller</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>50396</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Close Shave, A (1995)</td>\n",
" <td>Animation, Comedy, Thriller</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>13793</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Local Hero (1983)</td>\n",
" <td>Comedy</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>25784</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Dead Man Walking (1995)</td>\n",
" <td>Drama</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>15447</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Blade Runner (1982)</td>\n",
" <td>Film-Noir, Sci-Fi</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>24693</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>One Flew Over the Cuckoo's Nest (1975)</td>\n",
" <td>Drama</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>67385</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Good Will Hunting (1997)</td>\n",
2020-05-21 13:42:50 +02:00
" <td>Drama</td>\n",
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>55907</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>My Life as a Dog (Mitt liv som hund) (1985)</td>\n",
" <td>Drama</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>23403</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Eat Drink Man Woman (1994)</td>\n",
" <td>Comedy, Drama</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>18500</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Amadeus (1984)</td>\n",
" <td>Drama, Mystery</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>23038</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Apt Pupil (1998)</td>\n",
" <td>Drama, Thriller</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>57898</th>\n",
" <td>344</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Wrong Trousers, The (1993)</td>\n",
" <td>Animation, Comedy</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2020-05-21 16:20:12 +02:00
" user rating title \\\n",
"66489 344 5 Postino, Il (1994) \n",
"15966 344 5 Godfather, The (1972) \n",
"50067 344 5 Mrs. Brown (Her Majesty, Mrs. Brown) (1997) \n",
"65435 344 5 Speed (1994) \n",
"50396 344 5 Close Shave, A (1995) \n",
"13793 344 5 Local Hero (1983) \n",
"25784 344 5 Dead Man Walking (1995) \n",
"15447 344 5 Blade Runner (1982) \n",
"24693 344 5 One Flew Over the Cuckoo's Nest (1975) \n",
"67385 344 5 Good Will Hunting (1997) \n",
"55907 344 5 My Life as a Dog (Mitt liv som hund) (1985) \n",
"23403 344 5 Eat Drink Man Woman (1994) \n",
"18500 344 5 Amadeus (1984) \n",
"23038 344 5 Apt Pupil (1998) \n",
"57898 344 5 Wrong Trousers, The (1993) \n",
2020-05-21 13:42:50 +02:00
"\n",
2020-05-21 16:20:12 +02:00
" genres \n",
"66489 Drama, Romance \n",
"15966 Action, Crime, Drama \n",
"50067 Drama, Romance \n",
"65435 Action, Romance, Thriller \n",
"50396 Animation, Comedy, Thriller \n",
"13793 Comedy \n",
"25784 Drama \n",
"15447 Film-Noir, Sci-Fi \n",
"24693 Drama \n",
"67385 Drama \n",
"55907 Drama \n",
"23403 Comedy, Drama \n",
"18500 Drama, Mystery \n",
"23038 Drama, Thriller \n",
"57898 Animation, Comedy "
2020-05-21 13:42:50 +02:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user</th>\n",
" <th>rec_nb</th>\n",
" <th>title</th>\n",
" <th>genres</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>158</th>\n",
" <td>344.0</td>\n",
2020-05-21 13:42:50 +02:00
" <td>1</td>\n",
" <td>Star Wars (1977)</td>\n",
" <td>Action, Adventure, Romance, Sci-Fi, War</td>\n",
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>7055</th>\n",
" <td>344.0</td>\n",
2020-05-21 13:42:50 +02:00
" <td>2</td>\n",
" <td>Fargo (1996)</td>\n",
" <td>Crime, Drama, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>2769</th>\n",
" <td>344.0</td>\n",
2020-05-21 13:42:50 +02:00
" <td>3</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Return of the Jedi (1983)</td>\n",
" <td>Action, Adventure, Romance, Sci-Fi, War</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>1078</th>\n",
" <td>344.0</td>\n",
2020-05-21 13:42:50 +02:00
" <td>4</td>\n",
2020-05-21 16:20:12 +02:00
" <td>English Patient, The (1996)</td>\n",
" <td>Drama, Romance, War</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>4535</th>\n",
" <td>344.0</td>\n",
2020-05-21 13:42:50 +02:00
" <td>5</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Air Force One (1997)</td>\n",
" <td>Action, Thriller</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>6433</th>\n",
" <td>344.0</td>\n",
2020-05-21 13:42:50 +02:00
" <td>6</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Pulp Fiction (1994)</td>\n",
" <td>Crime, Drama</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>4851</th>\n",
" <td>344.0</td>\n",
2020-05-21 13:42:50 +02:00
" <td>7</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Titanic (1997)</td>\n",
" <td>Action, Drama, Romance</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>5110</th>\n",
" <td>344.0</td>\n",
2020-05-21 13:42:50 +02:00
" <td>8</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Full Monty, The (1997)</td>\n",
" <td>Comedy</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>1553</th>\n",
" <td>344.0</td>\n",
2020-05-21 13:42:50 +02:00
" <td>9</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Schindler's List (1993)</td>\n",
" <td>Drama, War</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" <tr>\n",
2020-05-21 16:20:12 +02:00
" <th>6646</th>\n",
" <td>344.0</td>\n",
2020-05-21 13:42:50 +02:00
" <td>10</td>\n",
2020-05-21 16:20:12 +02:00
" <td>Empire Strikes Back, The (1980)</td>\n",
" <td>Action, Adventure, Drama, Romance, Sci-Fi, War</td>\n",
2020-05-21 13:42:50 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2020-05-21 16:20:12 +02:00
" user rec_nb title \\\n",
"158 344.0 1 Star Wars (1977) \n",
"7055 344.0 2 Fargo (1996) \n",
"2769 344.0 3 Return of the Jedi (1983) \n",
"1078 344.0 4 English Patient, The (1996) \n",
"4535 344.0 5 Air Force One (1997) \n",
"6433 344.0 6 Pulp Fiction (1994) \n",
"4851 344.0 7 Titanic (1997) \n",
"5110 344.0 8 Full Monty, The (1997) \n",
"1553 344.0 9 Schindler's List (1993) \n",
"6646 344.0 10 Empire Strikes Back, The (1980) \n",
2020-05-21 13:42:50 +02:00
"\n",
" genres \n",
2020-05-21 16:20:12 +02:00
"158 Action, Adventure, Romance, Sci-Fi, War \n",
"7055 Crime, Drama, Thriller \n",
"2769 Action, Adventure, Romance, Sci-Fi, War \n",
"1078 Drama, Romance, War \n",
"4535 Action, Thriller \n",
"6433 Crime, Drama \n",
"4851 Action, Drama, Romance \n",
"5110 Comedy \n",
"1553 Drama, War \n",
"6646 Action, Adventure, Drama, Romance, Sci-Fi, War "
2020-05-21 13:42:50 +02:00
]
},
2020-05-21 16:20:12 +02:00
"execution_count": 11,
2020-05-21 13:42:50 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
"items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n",
"\n",
"user=random.choice(list(set(train['user'])))\n",
"\n",
"train_content=pd.merge(train, items, left_on='item', right_on='id')\n",
"display(train_content[train_content['user']==user][['user', 'rating', 'title', 'genres']]\\\n",
" .sort_values(by='rating', ascending=False)[:15])\n",
"\n",
2020-05-21 16:20:12 +02:00
"reco = np.loadtxt('Recommendations generated/ml-100k/Self_P3_reco.csv', delimiter=',')\n",
2020-05-21 13:42:50 +02:00
"items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n",
"\n",
"# Let's ignore scores - they are not used in evaluation: \n",
"reco_users=reco[:,:1]\n",
"reco_items=reco[:,1::2]\n",
"# Let's put them into one array\n",
"reco=np.concatenate((reco_users, reco_items), axis=1)\n",
"\n",
"# Let's rebuild it user-item dataframe\n",
"recommended=[]\n",
"for row in reco:\n",
" for rec_nb, entry in enumerate(row[1:]):\n",
" recommended.append((row[0], rec_nb+1, entry))\n",
"recommended=pd.DataFrame(recommended, columns=['user','rec_nb', 'item'])\n",
"\n",
"recommended_content=pd.merge(recommended, items, left_on='item', right_on='id')\n",
"recommended_content[recommended_content['user']==user][['user', 'rec_nb', 'title', 'genres']].sort_values(by='rec_nb')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# project task 6: generate recommendations of RP3Beta for hiperparameters found to optimize recall"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"# use better values than (1,0) for alpha and beta\n",
"# if you want you can also modify the model to consider different weights (we took as weights user ratings, maybe take ones or squares of ratings instead)\n",
"# save the outptut in 'Recommendations generated/ml-100k/Self_RP3Beta_estimations.csv'\n",
"# and 'Recommendations generated/ml-100k/Self_RP3Beta_reco.csv'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# project task 7 (optional): implement graph-based model of your choice "
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"# for example change length of paths in RP3beta\n",
"# save the outptut in 'Recommendations generated/ml-100k/Self_GraphTask_estimations.csv'\n",
"# and 'Recommendations generated/ml-100k/Self_GraphTask_reco.csv'"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}