WSR-432813/.ipynb_checkpoints/P5. Graph-based-checkpoint.ipynb

1683 lines
580 KiB
Plaintext
Raw Normal View History

2021-06-11 01:28:24 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Self made RP3-beta"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import helpers\n",
"import pandas as pd\n",
"import numpy as np\n",
"import scipy.sparse as sparse\n",
"from collections import defaultdict\n",
"from itertools import chain\n",
"import random\n",
"import time\n",
"import matplotlib.pyplot as plt\n",
"\n",
"train_read = pd.read_csv(\"./Datasets/ml-100k/train.csv\", sep=\"\\t\", header=None)\n",
"test_read = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n",
"(\n",
" train_ui,\n",
" test_ui,\n",
" user_code_id,\n",
" user_id_code,\n",
" item_code_id,\n",
" item_id_code,\n",
") = helpers.data_to_csr(train_read, test_read)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class RP3Beta:\n",
" def fit(self, train_ui, alpha, beta):\n",
" \"\"\"We weight our edges by user's explicit ratings so if user rated movie high we'll follow that path\n",
" with higher probability.\"\"\"\n",
" self.train_ui = train_ui\n",
" self.train_iu = train_ui.transpose()\n",
"\n",
" self.alpha = alpha\n",
" self.beta = beta\n",
"\n",
" # Define Pui\n",
" Pui = sparse.csr_matrix(self.train_ui / self.train_ui.sum(axis=1))\n",
"\n",
" # Define Piu\n",
" to_divide = np.vectorize(lambda x: x if x > 0 else 1)(\n",
" self.train_iu.sum(axis=1)\n",
" ) # to avoid dividing by zero\n",
" Piu = sparse.csr_matrix(self.train_iu / to_divide)\n",
" item_orders = (self.train_ui > 0).sum(axis=0)\n",
"\n",
" Pui = Pui.power(self.alpha)\n",
" Piu = Piu.power(self.alpha)\n",
"\n",
" P3 = Pui * Piu * Pui\n",
"\n",
" P3 /= np.power(\n",
" np.vectorize(lambda x: x if x > 0 else 1)(item_orders), self.beta\n",
" )\n",
"\n",
" self.estimations = np.array(P3)\n",
"\n",
" def recommend(self, user_code_id, item_code_id, topK=10):\n",
"\n",
" top_k = defaultdict(list)\n",
" for nb_user, user in enumerate(self.estimations):\n",
"\n",
" user_rated = self.train_ui.indices[\n",
" self.train_ui.indptr[nb_user] : self.train_ui.indptr[nb_user + 1]\n",
" ]\n",
" for item, score in enumerate(user):\n",
" if item not in user_rated and not np.isnan(score):\n",
" top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n",
" result = []\n",
" # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n",
" for uid, item_scores in top_k.items():\n",
" item_scores.sort(key=lambda x: x[1], reverse=True)\n",
" result.append([uid] + list(chain(*item_scores[:topK])))\n",
" return result\n",
"\n",
" def estimate(self, user_code_id, item_code_id, test_ui):\n",
" result = []\n",
" for user, item in zip(*test_ui.nonzero()):\n",
" result.append(\n",
" [\n",
" user_code_id[user],\n",
" item_code_id[item],\n",
" self.estimations[user, item]\n",
" if not np.isnan(self.estimations[user, item])\n",
" else 1,\n",
" ]\n",
" )\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"model = RP3Beta()\n",
"model.fit(train_ui, alpha=1, beta=0)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"top_n = pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
"\n",
"top_n.to_csv(\n",
" \"Recommendations generated/ml-100k/Self_P3_reco.csv\", index=False, header=False\n",
")\n",
"\n",
"estimations = pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
"estimations.to_csv(\n",
" \"Recommendations generated/ml-100k/Self_P3_estimations.csv\",\n",
" index=False,\n",
" header=False,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"943it [00:00, 7291.77it/s]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>H2R</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3.702446</td>\n",
" <td>3.527273</td>\n",
" <td>0.282185</td>\n",
" <td>0.192092</td>\n",
" <td>0.186749</td>\n",
" <td>0.21698</td>\n",
" <td>0.204185</td>\n",
" <td>0.240096</td>\n",
" <td>0.339114</td>\n",
" <td>0.204905</td>\n",
" <td>0.572157</td>\n",
" <td>0.593544</td>\n",
" <td>0.875928</td>\n",
" <td>0.685048</td>\n",
" <td>1.0</td>\n",
" <td>0.077201</td>\n",
" <td>3.875892</td>\n",
" <td>0.974947</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" RMSE MAE precision recall F_1 F_05 \\\n",
"0 3.702446 3.527273 0.282185 0.192092 0.186749 0.21698 \n",
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
"\n",
" HR H2R Reco in test Test coverage Shannon Gini \n",
"0 0.875928 0.685048 1.0 0.077201 3.875892 0.974947 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import evaluation_measures as ev\n",
"\n",
"estimations_df = pd.read_csv(\n",
" \"Recommendations generated/ml-100k/Self_P3_estimations.csv\", header=None\n",
")\n",
"reco = np.loadtxt(\"Recommendations generated/ml-100k/Self_P3_reco.csv\", delimiter=\",\")\n",
"\n",
"ev.evaluate(\n",
" test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n",
" estimations_df=estimations_df,\n",
" reco=reco,\n",
" super_reactions=[4, 5],\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Let's check hyperparameters"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Alpha"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/8 [00:00<?, ?it/s]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7531.55it/s]\u001b[A\n",
" 12%|█▎ | 1/8 [00:12<01:29, 12.74s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7345.73it/s]\u001b[A\n",
" 25%|██▌ | 2/8 [00:25<01:16, 12.72s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7308.91it/s]\u001b[A\n",
" 38%|███▊ | 3/8 [00:39<01:05, 13.11s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7282.27it/s]\u001b[A\n",
" 50%|█████ | 4/8 [00:51<00:52, 13.02s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7288.78it/s]\u001b[A\n",
" 62%|██████▎ | 5/8 [01:04<00:38, 12.97s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7404.95it/s]\u001b[A\n",
" 75%|███████▌ | 6/8 [01:17<00:25, 12.85s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7263.83it/s]\u001b[A\n",
" 88%|████████▊ | 7/8 [01:30<00:12, 12.94s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7220.02it/s]\u001b[A\n",
"100%|██████████| 8/8 [01:43<00:00, 12.96s/it]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Alpha</th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>H2R</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.2</td>\n",
" <td>268.177832</td>\n",
" <td>211.732649</td>\n",
" <td>0.262672</td>\n",
" <td>0.166858</td>\n",
" <td>0.166277</td>\n",
" <td>0.197184</td>\n",
" <td>0.187661</td>\n",
" <td>0.203252</td>\n",
" <td>0.320910</td>\n",
" <td>0.196132</td>\n",
" <td>0.563378</td>\n",
" <td>0.580866</td>\n",
" <td>0.850477</td>\n",
" <td>0.629905</td>\n",
" <td>1.000000</td>\n",
" <td>0.060606</td>\n",
" <td>3.669627</td>\n",
" <td>0.979636</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.4</td>\n",
" <td>10.546689</td>\n",
" <td>7.792373</td>\n",
" <td>0.268505</td>\n",
" <td>0.172669</td>\n",
" <td>0.171569</td>\n",
" <td>0.202643</td>\n",
" <td>0.192489</td>\n",
" <td>0.212653</td>\n",
" <td>0.326760</td>\n",
" <td>0.200172</td>\n",
" <td>0.565148</td>\n",
" <td>0.583801</td>\n",
" <td>0.854719</td>\n",
" <td>0.644751</td>\n",
" <td>1.000000</td>\n",
" <td>0.064214</td>\n",
" <td>3.726996</td>\n",
" <td>0.978426</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.6</td>\n",
" <td>3.143988</td>\n",
" <td>2.948790</td>\n",
" <td>0.274655</td>\n",
" <td>0.180502</td>\n",
" <td>0.177820</td>\n",
" <td>0.208730</td>\n",
" <td>0.198176</td>\n",
" <td>0.222746</td>\n",
" <td>0.332872</td>\n",
" <td>0.203290</td>\n",
" <td>0.568872</td>\n",
" <td>0.587738</td>\n",
" <td>0.870626</td>\n",
" <td>0.657476</td>\n",
" <td>1.000000</td>\n",
" <td>0.065657</td>\n",
" <td>3.785282</td>\n",
" <td>0.977090</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.8</td>\n",
" <td>3.670728</td>\n",
" <td>3.495735</td>\n",
" <td>0.281972</td>\n",
" <td>0.189868</td>\n",
" <td>0.185300</td>\n",
" <td>0.216071</td>\n",
" <td>0.203541</td>\n",
" <td>0.236751</td>\n",
" <td>0.339867</td>\n",
" <td>0.206688</td>\n",
" <td>0.573729</td>\n",
" <td>0.592432</td>\n",
" <td>0.874867</td>\n",
" <td>0.685048</td>\n",
" <td>1.000000</td>\n",
" <td>0.070707</td>\n",
" <td>3.832415</td>\n",
" <td>0.975998</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>3.702446</td>\n",
" <td>3.527273</td>\n",
" <td>0.282185</td>\n",
" <td>0.192092</td>\n",
" <td>0.186749</td>\n",
" <td>0.216980</td>\n",
" <td>0.204185</td>\n",
" <td>0.240096</td>\n",
" <td>0.339114</td>\n",
" <td>0.204905</td>\n",
" <td>0.572157</td>\n",
" <td>0.593544</td>\n",
" <td>0.875928</td>\n",
" <td>0.685048</td>\n",
" <td>1.000000</td>\n",
" <td>0.077201</td>\n",
" <td>3.875892</td>\n",
" <td>0.974947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.2</td>\n",
" <td>3.704441</td>\n",
" <td>3.529251</td>\n",
" <td>0.280912</td>\n",
" <td>0.193633</td>\n",
" <td>0.187311</td>\n",
" <td>0.216872</td>\n",
" <td>0.203004</td>\n",
" <td>0.240588</td>\n",
" <td>0.338049</td>\n",
" <td>0.203453</td>\n",
" <td>0.571830</td>\n",
" <td>0.594313</td>\n",
" <td>0.883351</td>\n",
" <td>0.681866</td>\n",
" <td>1.000000</td>\n",
" <td>0.085859</td>\n",
" <td>3.910718</td>\n",
" <td>0.974073</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.4</td>\n",
" <td>3.704580</td>\n",
" <td>3.529388</td>\n",
" <td>0.273595</td>\n",
" <td>0.190651</td>\n",
" <td>0.183874</td>\n",
" <td>0.212183</td>\n",
" <td>0.199464</td>\n",
" <td>0.239118</td>\n",
" <td>0.329550</td>\n",
" <td>0.195433</td>\n",
" <td>0.566171</td>\n",
" <td>0.592793</td>\n",
" <td>0.871686</td>\n",
" <td>0.675504</td>\n",
" <td>1.000000</td>\n",
" <td>0.107504</td>\n",
" <td>3.961915</td>\n",
" <td>0.972674</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.6</td>\n",
" <td>3.704591</td>\n",
" <td>3.529399</td>\n",
" <td>0.263097</td>\n",
" <td>0.186255</td>\n",
" <td>0.178709</td>\n",
" <td>0.205170</td>\n",
" <td>0.191094</td>\n",
" <td>0.232920</td>\n",
" <td>0.317439</td>\n",
" <td>0.184917</td>\n",
" <td>0.552349</td>\n",
" <td>0.590545</td>\n",
" <td>0.868505</td>\n",
" <td>0.669141</td>\n",
" <td>0.999576</td>\n",
" <td>0.156566</td>\n",
" <td>4.060156</td>\n",
" <td>0.969203</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Alpha RMSE MAE precision recall F_1 F_05 \\\n",
"0 0.2 268.177832 211.732649 0.262672 0.166858 0.166277 0.197184 \n",
"0 0.4 10.546689 7.792373 0.268505 0.172669 0.171569 0.202643 \n",
"0 0.6 3.143988 2.948790 0.274655 0.180502 0.177820 0.208730 \n",
"0 0.8 3.670728 3.495735 0.281972 0.189868 0.185300 0.216071 \n",
"0 1.0 3.702446 3.527273 0.282185 0.192092 0.186749 0.216980 \n",
"0 1.2 3.704441 3.529251 0.280912 0.193633 0.187311 0.216872 \n",
"0 1.4 3.704580 3.529388 0.273595 0.190651 0.183874 0.212183 \n",
"0 1.6 3.704591 3.529399 0.263097 0.186255 0.178709 0.205170 \n",
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
"0 0.187661 0.203252 0.320910 0.196132 0.563378 0.580866 \n",
"0 0.192489 0.212653 0.326760 0.200172 0.565148 0.583801 \n",
"0 0.198176 0.222746 0.332872 0.203290 0.568872 0.587738 \n",
"0 0.203541 0.236751 0.339867 0.206688 0.573729 0.592432 \n",
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
"0 0.203004 0.240588 0.338049 0.203453 0.571830 0.594313 \n",
"0 0.199464 0.239118 0.329550 0.195433 0.566171 0.592793 \n",
"0 0.191094 0.232920 0.317439 0.184917 0.552349 0.590545 \n",
"\n",
" HR H2R Reco in test Test coverage Shannon Gini \n",
"0 0.850477 0.629905 1.000000 0.060606 3.669627 0.979636 \n",
"0 0.854719 0.644751 1.000000 0.064214 3.726996 0.978426 \n",
"0 0.870626 0.657476 1.000000 0.065657 3.785282 0.977090 \n",
"0 0.874867 0.685048 1.000000 0.070707 3.832415 0.975998 \n",
"0 0.875928 0.685048 1.000000 0.077201 3.875892 0.974947 \n",
"0 0.883351 0.681866 1.000000 0.085859 3.910718 0.974073 \n",
"0 0.871686 0.675504 1.000000 0.107504 3.961915 0.972674 \n",
"0 0.868505 0.669141 0.999576 0.156566 4.060156 0.969203 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from tqdm import tqdm\n",
"\n",
"result = []\n",
"for alpha in tqdm([round(i, 1) for i in np.arange(0.2, 1.6001, 0.2)]):\n",
" model = RP3Beta()\n",
" model.fit(train_ui, alpha=alpha, beta=0)\n",
" reco = pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
" estimations_df = pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
" to_append = ev.evaluate(\n",
" test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n",
" estimations_df=estimations_df,\n",
" reco=np.array(reco),\n",
" super_reactions=[4, 5],\n",
" )\n",
" to_append.insert(0, \"Alpha\", alpha)\n",
" result.append(to_append)\n",
"\n",
"result = pd.concat(result)\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABCQAAAkoCAYAAACzg26yAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzde3ycZZ3//9cn50yTNpk0PaTJNAHKoediW85yKgpKAVdUWFFQdtlV0N3V/X6XXfenfl19rIKuLiu6nkFBUFCxRRAQQUEobYG2lJZDoWmbJm3TtOk558/vj7kTpiFtkzYz92Tm/Xw85pGZ677umc+04eLu576uz2XujoiIiIiIiIhIKuWEHYCIiIiIiIiIZB8lJEREREREREQk5ZSQEBEREREREZGUU0JCRERERERERFJOCQkRERERERERSTklJEREREREREQk5ZSQEBERERERkT5m9r9m9v8Not/LZnZe8iOSTKWEhGQcM6s3swNmttfMtpjZHWZWEhy7w8zczC7vd843g/brgtcFZvYNM2sI3qfezL51iM/ofXw7ld9TRGQkCcbNDjMb26/9xWD8rU1o+2LQdlq/vteZWXe/sXevmVWl6GuIiGQFd/97d/+PQfSb5u5PpiAkyVBKSEimWujuJcBsYA7wrwnHXgM+2vvCzPKADwJvJPT5V2AuMB8oBc4DXhjoMxIeNw33lxARyTDrgat7X5jZDCCS2MHMjPgYvYOEsTrBs/3G3hJ3b0xm0CIiI1FwjSuS1pSQkIzm7luAR4gnJnotBs42s/Lg9cXAKmBLQp95wG/cvdHj6t39p6mIWUQkg/2Mg5MM1wL9x9ZzgInAp4GrzKwgRbGJiIwIwYyzfzWzNWa208x+YmZFZnZeMLv3X8xsC/ATM8sxs5vN7A0zazGzX5pZNOG9zjazZ8ys1cw2JcwWvsPMvhw8H2tmDwZ9dpjZU2aWkxDLguB5oZl9y8wag8e3zKwwONYb22fNbJuZNZnZx1L9ZyfpRwkJyWhmVg1cAqxLaG4DfgtcFbz+KG+/IF4CfMbMPmlmM4I7diIicmyWAKPN7BQzyyU+Dt/Vr8+1xBPHvwxeL0xhfCIiI8WHgXcDxwMnAv8etE8AosBk4AbgU8AVwLlAFbATuB3AzCYDDwP/A1QSv4G3YoDP+izQEPQZD/wb4AP0+xxwevA+s4jPNP73hOMTgDHAJOB64PaEG4SSpZSQkEz1gJntATYB24Av9Dv+U+CjZlZGfIB+oN/x/wS+RnywXw5sNrNrB/iM1oTH3w7zdxARyUS9syQuAtYCm3sPmFkE+ADwc3fvBO7n7cs2Tu839r6BiEj2+ba7b3L3HcBXeGs5XA/wBXdvd/cDwN8Dn3P3BndvB74IXBks5/hr4A/ufo+7d7p7i7uvGOCzOonPXJsc9HvK3QdKSHwY+JK7b3P3ZuD/AR/p9z5fCt7jIWAvcNIx/jnICKeEhGSqK9y9t/bDycBBRdTc/WniWd7PAQ8GA3bi8W53v93dzwLKiA/0PzazU/p9RlnC4wfJ+zoiIhnjZ8Qvgq/j7bPT3gd0AQ8Fr+8GLjGzyoQ+S/qNvccnO2ARkTS0KeH5BuKzHwCa3b0t4dhk4De9SVziieBu4jMdaji4htqh3Ep8tvGjZvammd18iH5VQSwDxQXQ4u5dCa/3AyWD+HzJYEpISEZz9z8BdwBfH+DwXcSnoB22NoS7H3D324lPcZs63DGKiGQTd99AvLjle4Bf9zt8LfGL043B+uf7gHziCQwREXlLTcLzGNBb3Lf/zIVNwCX9ErlF7r45OHbEpK6773H3z7r7ccBlxJc1XzhA10biCZCB4hIZkBISkg2+BVxkZrP6td9GfMrwn/ufYGb/GBTfKTazvGC5RinwYtKjFRHJfNcDF7j7voS2ScCFwKXE1x/PJr4G+WsMvNuGiEg2u9HMqoMClZ8DfnGIfv8LfCWoF4GZVZrZ5cGxu4EFZvbB4Hq3wsxm938DM7vUzE4IaqrtIj7DomeAz7oH+PfgM8YCn+ftdYJEDqKEhGS8YA3bT4kPiontO9z98UOsgdsPfIP4zhvbgRuB97v7mwl9FpvZ3oTHb5L0FUREMoq7v+Huy/s1nwOscPdH3X1L74N48nimmU0P+p3Rb+zda2bzUvoFRETC93PgUeBN4ssuvnyIfv8NLCK+3GIP8eLCpwG4+0bis9U+S3yr5RXEE8H9TQH+QLzmw7PAd9z9iQH6fZl47bVVwEvAC4eJSwQAG/jfYiIiIiIiIpJuzKwe+Bt3/0PYsYgcK82QEBEREREREZGUU0JCRERERERERFJOSzZEREREREREJOU0Q0JEREREREREUk4JCRERERERERFJubywAxgOY8eO9dra2rDDEBF5m+eff367u1eGHUcqaCwWkXSkcVhEJHyHGoszIiFRW1vL8uX9tzMXEQmfmW0IO4ZU0VgsIulI47CISPgONRZryYaIiIiIiIiIpJwSEiIiIiIiIiKSckpIiIiIiIiIiEjKZUQNCRFJD52dnTQ0NNDW1hZ2KClXVFREdXU1+fn5YYciIvI22TA+axwWkWOVDWNlsg11LFZCQkSGTUNDA6WlpdTW1mJmYYeTMu5OS0sLDQ0N1NXVhR2OiMjbZPr4nKpx2MwuBv4byAV+6O5f7Xf8M8DfAF1AM/Bxd98QHLsFeC/xGcqPAf8AFAP3AccD3cBid7856H8dcCuwOXj7b7v7D5P25UQk48fKZDuasVhLNkRk2LS1tVFRUZF1A7iZUVFRoWy6iKStTB+fUzEOm1kucDtwCTAVuNrMpvbr9iIw191nAvcDtwTnngmcBcwEpgPzgHODc77u7icDc4CzzOyShPf7hbvPDh5KRogkWaaPlcl2NGOxEhIiMqyydQDP1u8tIiNHpo9TKfh+84F17v6mu3cA9wKXJ3Zw9yfcfX/wcglQ3XsIKAIKgEIgH9jq7vvd/Yng3A7ghYRzRCQEmT5WJttQ//yUkBCRjJKbm8vs2bOZPn06CxcupLW1FYD6+nrMjH//93/v67t9+3by8/O56aabAHj11Vc577zzmD17Nqeccgo33HADAE8++SRjxoxh9uzZfY8//OEPKf9uIiIjmZlxzTXX9L3u6uqisrKSSy+99KB+V1xxBaeffvpBbV/84heZNGnSQeNw7/ieQpOATQmvG4K2Q7keeBjA3Z8FngCagscj7r42sbOZlQELgccTmt9vZqvM7H4zqznmbyAiWWn58uV8+tOfPuTxxsZGrrzyyhRG9BYlJEQkoxQXF7NixQpWr15NNBrl9ttv7ztWV1fH7373u77X9913H9OmTet7/elPf5p/+qd/YsWKFaxdu5ZPfepTfcfOOeccVqxY0fdYsGBBar6QiEiGGDVqFKtXr+bAgQMAPPbYY0yadPC/51tbW3n++efZtWsXb7755kHHesfn3kdZWVmqQh8yM7sGmEu8BgRmdgJwCvHZD5OAC8zsnIT+ecA9wG3u3vvFFwO1wfKPx4A7D/FZN5jZcjNb3tzcnKyvJCJppLu7e0j9586dy2233XbI41VVVdx///3HGtZRUUJCRDLWGWecwebNm/teRyIRTjnlFJYvXw7AL37xCz74wQ/2HW9qaqK6+q2ZsjNmzEhdsCIiWeA973lPX2L4nnvu4eqrrz7o+K9//WsWLlzIVVddxb333htGiIezGUicpVDNWwUn+5jZAuBzwGXu3h40vw9Y4u573X0v8ZkTZySc9n3gdXf/Vm+Du7cknP9D4B0DBeXu33f3ue4+t7Ky8ui+mYikjfr6ek4++WQ+/OEPc8opp3DllVeyf/9+amtr+Zd/+RdOPfVU7rvvPh599FHOOOMMTj31VD7wgQ+wd+9eAJYtW8aZZ57JrFmzmD9/Pnv27OHJJ5/sm432pz/9qW+m2Zw5c9izZw/19fVMnz4diNfR+NjHPsaMGTOYM2cOTzzxBAB33HEHf/VXf8XFF1/MlClT+L//9/8Oy/fVLhsikhT/b/HLrGncPazvObVqNF9YOO3IHYlnjh9//HGuv/76g9p7L3LHjx9Pbm4uVVVVNDY2AvG7bxdccAFnnnk
"text/plain": [
"<Figure size 1296x3024 with 18 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"metrics = list(result.columns[[i not in [\"Alpha\"] for i in result.columns]])\n",
"\n",
"charts_per_row = 6\n",
"charts_per_column = 3\n",
"\n",
"fig, axes = plt.subplots(\n",
" nrows=charts_per_row, ncols=charts_per_column, figsize=(18, 7 * charts_per_row)\n",
")\n",
"import itertools\n",
"\n",
"to_iter = [\n",
" i for i in itertools.product(range(charts_per_row), range(charts_per_column))\n",
"]\n",
"\n",
"for i in range(len(metrics)):\n",
" df = result[[\"Alpha\", metrics[i]]]\n",
" df.plot(ax=axes[to_iter[i]], title=metrics[i], x=0, y=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Beta"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/10 [00:00<?, ?it/s]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7152.45it/s]\u001b[A\n",
" 10%|█ | 1/10 [00:12<01:51, 12.44s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7140.39it/s]\u001b[A\n",
" 20%|██ | 2/10 [00:25<01:41, 12.64s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 6865.13it/s]\u001b[A\n",
" 30%|███ | 3/10 [00:38<01:29, 12.76s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7305.02it/s]\u001b[A\n",
" 40%|████ | 4/10 [00:51<01:17, 12.89s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7224.53it/s]\u001b[A\n",
" 50%|█████ | 5/10 [01:04<01:04, 12.88s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 6764.31it/s]\u001b[A\n",
" 60%|██████ | 6/10 [01:16<00:51, 12.78s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 6802.54it/s]\u001b[A\n",
" 70%|███████ | 7/10 [01:30<00:38, 12.98s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7247.39it/s]\u001b[A\n",
" 80%|████████ | 8/10 [01:42<00:25, 12.95s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 6925.58it/s]\u001b[A\n",
" 90%|█████████ | 9/10 [01:55<00:12, 12.85s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7723.16it/s]\u001b[A\n",
"100%|██████████| 10/10 [02:08<00:00, 12.84s/it]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Beta</th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>H2R</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0</td>\n",
" <td>3.702446</td>\n",
" <td>3.527273</td>\n",
" <td>0.282185</td>\n",
" <td>0.192092</td>\n",
" <td>0.186749</td>\n",
" <td>0.216980</td>\n",
" <td>0.204185</td>\n",
" <td>0.240096</td>\n",
" <td>0.339114</td>\n",
" <td>0.204905</td>\n",
" <td>0.572157</td>\n",
" <td>0.593544</td>\n",
" <td>0.875928</td>\n",
" <td>0.685048</td>\n",
" <td>1.000000</td>\n",
" <td>0.077201</td>\n",
" <td>3.875892</td>\n",
" <td>0.974947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.1</td>\n",
" <td>3.703312</td>\n",
" <td>3.528128</td>\n",
" <td>0.290138</td>\n",
" <td>0.197597</td>\n",
" <td>0.192259</td>\n",
" <td>0.223336</td>\n",
" <td>0.210944</td>\n",
" <td>0.246153</td>\n",
" <td>0.347768</td>\n",
" <td>0.212034</td>\n",
" <td>0.581038</td>\n",
" <td>0.596328</td>\n",
" <td>0.884411</td>\n",
" <td>0.695652</td>\n",
" <td>1.000000</td>\n",
" <td>0.085137</td>\n",
" <td>3.957416</td>\n",
" <td>0.972784</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.2</td>\n",
" <td>3.703825</td>\n",
" <td>3.528636</td>\n",
" <td>0.297137</td>\n",
" <td>0.201202</td>\n",
" <td>0.196067</td>\n",
" <td>0.228169</td>\n",
" <td>0.218026</td>\n",
" <td>0.252767</td>\n",
" <td>0.355655</td>\n",
" <td>0.219909</td>\n",
" <td>0.588904</td>\n",
" <td>0.598160</td>\n",
" <td>0.886532</td>\n",
" <td>0.697773</td>\n",
" <td>1.000000</td>\n",
" <td>0.094517</td>\n",
" <td>4.053212</td>\n",
" <td>0.969980</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.3</td>\n",
" <td>3.704130</td>\n",
" <td>3.528939</td>\n",
" <td>0.303499</td>\n",
" <td>0.204749</td>\n",
" <td>0.199901</td>\n",
" <td>0.232829</td>\n",
" <td>0.225107</td>\n",
" <td>0.260797</td>\n",
" <td>0.363757</td>\n",
" <td>0.226825</td>\n",
" <td>0.599969</td>\n",
" <td>0.599964</td>\n",
" <td>0.888653</td>\n",
" <td>0.707317</td>\n",
" <td>1.000000</td>\n",
" <td>0.105339</td>\n",
" <td>4.147779</td>\n",
" <td>0.966948</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.4</td>\n",
" <td>3.704313</td>\n",
" <td>3.529120</td>\n",
" <td>0.308908</td>\n",
" <td>0.208811</td>\n",
" <td>0.203854</td>\n",
" <td>0.237241</td>\n",
" <td>0.229614</td>\n",
" <td>0.266918</td>\n",
" <td>0.370758</td>\n",
" <td>0.232673</td>\n",
" <td>0.609385</td>\n",
" <td>0.602014</td>\n",
" <td>0.895016</td>\n",
" <td>0.718982</td>\n",
" <td>0.999894</td>\n",
" <td>0.132035</td>\n",
" <td>4.259682</td>\n",
" <td>0.962989</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.5</td>\n",
" <td>3.704422</td>\n",
" <td>3.529229</td>\n",
" <td>0.314316</td>\n",
" <td>0.211411</td>\n",
" <td>0.206768</td>\n",
" <td>0.240986</td>\n",
" <td>0.237124</td>\n",
" <td>0.273416</td>\n",
" <td>0.378307</td>\n",
" <td>0.239297</td>\n",
" <td>0.622792</td>\n",
" <td>0.603327</td>\n",
" <td>0.903499</td>\n",
" <td>0.724284</td>\n",
" <td>0.999046</td>\n",
" <td>0.168831</td>\n",
" <td>4.411281</td>\n",
" <td>0.956648</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.6</td>\n",
" <td>3.704488</td>\n",
" <td>3.529295</td>\n",
" <td>0.314634</td>\n",
" <td>0.206209</td>\n",
" <td>0.204818</td>\n",
" <td>0.240159</td>\n",
" <td>0.242489</td>\n",
" <td>0.273850</td>\n",
" <td>0.376438</td>\n",
" <td>0.238428</td>\n",
" <td>0.622042</td>\n",
" <td>0.600721</td>\n",
" <td>0.897137</td>\n",
" <td>0.720042</td>\n",
" <td>0.996394</td>\n",
" <td>0.212843</td>\n",
" <td>4.621938</td>\n",
" <td>0.945932</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.7</td>\n",
" <td>3.704528</td>\n",
" <td>3.529335</td>\n",
" <td>0.304136</td>\n",
" <td>0.187298</td>\n",
" <td>0.191990</td>\n",
" <td>0.228749</td>\n",
" <td>0.238305</td>\n",
" <td>0.256201</td>\n",
" <td>0.358807</td>\n",
" <td>0.226808</td>\n",
" <td>0.593897</td>\n",
" <td>0.591207</td>\n",
" <td>0.868505</td>\n",
" <td>0.693531</td>\n",
" <td>0.983033</td>\n",
" <td>0.256854</td>\n",
" <td>4.898568</td>\n",
" <td>0.928065</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.8</td>\n",
" <td>3.704552</td>\n",
" <td>3.529360</td>\n",
" <td>0.266384</td>\n",
" <td>0.147571</td>\n",
" <td>0.158660</td>\n",
" <td>0.194838</td>\n",
" <td>0.214485</td>\n",
" <td>0.209336</td>\n",
" <td>0.299850</td>\n",
" <td>0.184356</td>\n",
" <td>0.492852</td>\n",
" <td>0.571152</td>\n",
" <td>0.803818</td>\n",
" <td>0.604454</td>\n",
" <td>0.936373</td>\n",
" <td>0.341270</td>\n",
" <td>5.257397</td>\n",
" <td>0.895882</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.9</td>\n",
" <td>3.704567</td>\n",
" <td>3.529375</td>\n",
" <td>0.162354</td>\n",
" <td>0.076967</td>\n",
" <td>0.089233</td>\n",
" <td>0.114583</td>\n",
" <td>0.134657</td>\n",
" <td>0.113253</td>\n",
" <td>0.160868</td>\n",
" <td>0.085486</td>\n",
" <td>0.243590</td>\n",
" <td>0.535405</td>\n",
" <td>0.580064</td>\n",
" <td>0.400848</td>\n",
" <td>0.800106</td>\n",
" <td>0.415584</td>\n",
" <td>5.563910</td>\n",
" <td>0.857396</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Beta RMSE MAE precision recall F_1 F_05 \\\n",
"0 0.0 3.702446 3.527273 0.282185 0.192092 0.186749 0.216980 \n",
"0 0.1 3.703312 3.528128 0.290138 0.197597 0.192259 0.223336 \n",
"0 0.2 3.703825 3.528636 0.297137 0.201202 0.196067 0.228169 \n",
"0 0.3 3.704130 3.528939 0.303499 0.204749 0.199901 0.232829 \n",
"0 0.4 3.704313 3.529120 0.308908 0.208811 0.203854 0.237241 \n",
"0 0.5 3.704422 3.529229 0.314316 0.211411 0.206768 0.240986 \n",
"0 0.6 3.704488 3.529295 0.314634 0.206209 0.204818 0.240159 \n",
"0 0.7 3.704528 3.529335 0.304136 0.187298 0.191990 0.228749 \n",
"0 0.8 3.704552 3.529360 0.266384 0.147571 0.158660 0.194838 \n",
"0 0.9 3.704567 3.529375 0.162354 0.076967 0.089233 0.114583 \n",
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
"0 0.210944 0.246153 0.347768 0.212034 0.581038 0.596328 \n",
"0 0.218026 0.252767 0.355655 0.219909 0.588904 0.598160 \n",
"0 0.225107 0.260797 0.363757 0.226825 0.599969 0.599964 \n",
"0 0.229614 0.266918 0.370758 0.232673 0.609385 0.602014 \n",
"0 0.237124 0.273416 0.378307 0.239297 0.622792 0.603327 \n",
"0 0.242489 0.273850 0.376438 0.238428 0.622042 0.600721 \n",
"0 0.238305 0.256201 0.358807 0.226808 0.593897 0.591207 \n",
"0 0.214485 0.209336 0.299850 0.184356 0.492852 0.571152 \n",
"0 0.134657 0.113253 0.160868 0.085486 0.243590 0.535405 \n",
"\n",
" HR H2R Reco in test Test coverage Shannon Gini \n",
"0 0.875928 0.685048 1.000000 0.077201 3.875892 0.974947 \n",
"0 0.884411 0.695652 1.000000 0.085137 3.957416 0.972784 \n",
"0 0.886532 0.697773 1.000000 0.094517 4.053212 0.969980 \n",
"0 0.888653 0.707317 1.000000 0.105339 4.147779 0.966948 \n",
"0 0.895016 0.718982 0.999894 0.132035 4.259682 0.962989 \n",
"0 0.903499 0.724284 0.999046 0.168831 4.411281 0.956648 \n",
"0 0.897137 0.720042 0.996394 0.212843 4.621938 0.945932 \n",
"0 0.868505 0.693531 0.983033 0.256854 4.898568 0.928065 \n",
"0 0.803818 0.604454 0.936373 0.341270 5.257397 0.895882 \n",
"0 0.580064 0.400848 0.800106 0.415584 5.563910 0.857396 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from tqdm import tqdm\n",
"\n",
"result = []\n",
"for beta in tqdm([round(i, 1) for i in np.arange(0, 1, 0.1)]):\n",
" model = RP3Beta()\n",
" model.fit(train_ui, alpha=1, beta=beta)\n",
" reco = pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
" estimations_df = pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
" to_append = ev.evaluate(\n",
" test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n",
" estimations_df=estimations_df,\n",
" reco=np.array(reco),\n",
" super_reactions=[4, 5],\n",
" )\n",
" to_append.insert(0, \"Beta\", beta)\n",
" result.append(to_append)\n",
"\n",
"result = pd.concat(result)\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABCQAAAkoCAYAAACzg26yAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd3gd1bX38e9St+QiW5KrLEsuuDcwLpTQwXRCJ3SScFNII+WGhJCE5N43CSHkJuEmIeUm9JYAhkDAtAQSS9gG27jhIsm9yJJlWZbV1/vHGRlZyLZsS2cknd/nec7DmT175qyxzPbRmj1rm7sjIiIiIiIiIhJNcWEHICIiIiIiIiKxRwkJEREREREREYk6JSREREREREREJOqUkBARERERERGRqFNCQkRERERERESiTgkJEREREREREYk6JSRERERERERkHzP7jZl9pw39lpnZqR0fkXRXSkhIt2NmxWa218wqzWyrmf3JzHoG+/5kZm5mF7c45r6g/aZgO8nM7jWzjcF5is3s5wf4jKbXr6J5nSIiXUUwZtaaWWaL9veCsTe3Wdv3grYZLfreZGYNLcbdSjMbHKXLEBGJGe7+GXf/QRv6jXf3N6MQknRTSkhId3Whu/cEpgBTgTua7VsF3NC0YWYJwJXA2mZ97gCmAdOBXsCpwLutfUaz123tfREiIt1IEXBN04aZTQRSm3cwMyMyPpfRbJxuZl6Lcbenu2/uyKBFRLqq4DuuSKemhIR0a+6+FXiZSGKiyfPASWbWN9ieDSwBtjbrczzwjLtv9ohid38wGjGLiHRTD7F/kuFGoOW4ejIwCPgicLWZJUUpNhGRLiOYdXaHmS03s51m9n9mlmJmpwaze//TzLYC/2dmcWb2TTNba2alZvakmfVrdq6TzOzfZlZuZhuazRb+k5n9MHifaWYvBH3KzOwtM4trFsuZwftkM/u5mW0OXj83s+RgX1NsXzWz7Wa2xcxujvafnXQ+SkhIt2Zm2cC5wJpmzdXAc8DVwfYNfPRLcT5wu5l9zswmBnftRETkyOUDvc1srJnFExmDH27R50YiSeMng+0LoxifiEhXci1wDjACOAa4M2gfCPQDhgG3Al8ALgFOAQYDO4H7AcxsGPAS8Esgi8gNvEWtfNZXgY1BnwHAtwBvpd+3gZnBeSYTmWl8Z7P9A4E+wBDgk8D9zW4QSoxSQkK6q2fNbDewAdgOfLfF/geBG8wsncgA/WyL/f8P+DGRwX4BsMnMbmzlM8qbvT7dztcgItLdNM2SOAtYAWxq2mFmqcAVwKPuXgc8zUcf25jZYtxdi4hIbPqVu29w9zLgv/jwkbhG4LvuXuPue4HPAN92943uXgN8D7g8eJzjE8Cr7v6Yu9e5e6m7L2rls+qIzF4bFvR7y91bS0hcC9zt7tvdvQT4PnB9i/PcHZzjRaASGH2Ufw7SxSkhId3VJe7eVPthDLBfITV3f5tIlvfbwAvBgN18f4O73+/uJwLpRAb6P5rZ2Bafkd7s9buOuxwRkW7hISJfgG/iozPTPg7UAy8G248A55pZVrM++S3G3REdHbCISCe1odn7dURmPwCUuHt1s33DgGeaErlEksENRGY6DGX/GmoHcg+R2cavmFmhmX3zAP0GB7G0FhdAqbvXN9uuAnq24fOlG1NCQro1d/8H8Cfgp63sfpjIFLSD1oZw973ufj+RKW7j2jtGEZFY4e7riBS3PA/4a4vdNxL5Yro+ePb5KSCRSAJDRET2N7TZ+xygqcBvy5kLG4BzWyRzU9x9U7DvkIldd9/t7l919+HARUQeaz6jla6biSRAWotLpFVKSEgs+DlwlplNbtH+CyLThv/Z8gAz+3JQfKeHmSUEj2v0At7r8GhFRLq3TwKnu/ueZm1DgDOAC4g8ezyFyPPHP6b11TZERGLd580sOyhQ+W3giQP0+w3wX0G9CMwsy8wuDvY9ApxpZlcG33czzGxKyxOY2QVmNjKoqbaLyAyLxlY+6zHgzuAzMoG7+GitIJH9KCEh3V7wDNuDRAbF5u1l7v7aAZ6BqwLuJbLyxg7g88Bl7l7YrM/zZlbZ7PVMB12CiEi34e5r3X1Bi+aTgUXu/oq7b216EUkcTzKzCUG/WS3G3UozOz6qFyAi0jk8CrwCFBJ57OKHB+j3P8AcIo9b7CZSYHgGgLuvJzJj7atEllteRCQZ3NIo4FUiNR/mAf/r7m+00u+HRGqvLQHeB949SFwiAFjrv4uJiIiIiIhIZ2NmxcCn3P3VsGMROVqaISEiIiIiIiIiUaeEhIiIiIiIiIhEnR7ZEBEREREREZGo0wwJEREREREREYk6JSREREREREREJOoSwg6gPWRmZnpubm7YYYiI7GfhwoU73D0r7DiiRWOxiHRGsTQWaxwWkc7oYONwt0hI5ObmsmBByyXNRUTCZWbrwo4hmjQWi0hnFPZYbGazgf8B4oHfu/uPWuz/DPB5oAGoBG519+VmdhbwIyAJqAW+7u6vH+yzNA6LSGd0sHFYj2yIiIiIiHQAM4sH7gfOBcYB15jZuBbdHnX3ie4+BfgJ8LOgfQdwobtPBG4EHopO1CIi0aOEhIiIiIhIx5gOrHH3QnevBR4HLm7ewd0rmm2mAR60v+fum4P2ZUAPM0uOQswiIlHTLR7ZEBERERHphIYAG5ptbwRmtOxkZp8HbifyeMbprZznMuBdd69p5dhbgVsBcnJy2iFkEZHo6bYJibq6OjZu3Eh1dXXYoURdSkoK2dnZJCYmhh2KiMS47j4Wa7wVkfbg7vcD95vZJ4A7iTyiAYCZjQd+DJx9gGMfAB4AmDZtmnd8tCLdV3f/3tLRjuR7UbdNSGzcuJFevXqRm5uLmYUdTtS4O6WlpWzcuJG8vLywwxGRGNedx2KNtyLSBpuAoc22s4O2A3kc+HXThpllA88AN7j72g6JUET26c7fWzrakX4v6rY1JKqrq8nIyIi5v0hmRkZGhrJ6ItIpdOexWOOtiLTBfGCUmeWZWRJwNTCneQczG9Vs83xgddCeDvwN+Ka7/ys64YrEtu78vaWjHen3om6bkABi9i9SrF63iHRO3XlM6s7XJiJHz93rgduAl4EVwJPuvszM7jazi4Jut5nZMjNbRKSORNPjGrcBI4G7zGxR8Oof5UsQiTn6t/3IHcmfXbdOSIQtPj6eKVOmMGHCBC688ELKy8sBKC4uxsy488479/XdsWMHiYmJ3HbbbQB88MEHnHrqqUyZMoWxY8dy6623AvDmm2/Sp08fpkyZsu/16quvRv3aRES6CjPjuuuu27ddX19PVlYWF1xwwX79LrnkEmbOnLlf2/e+9z2GDBmy35jbNJaLiLSFu7/o7se4+wh3/6+g7S53nxO8/5K7j3f3Ke5+mrsvC9p/6O5pQXvTa3uY1yIiXdOCBQv44he/eMD9mzdv5vLLL49iRB9SQqID9ejRg0WLFrF06VL69evH/fffv29fXl4ef/vb3/ZtP/XUU4wfP37f9he/+EW+8pWvsGjRIlasWMEXvvCFfftOPvlkFi1atO915plnRueCRES6oLS0NJYuXcrevXsBmDt3LkOGDNmvT3l5OQsXLmTXrl0UFhbut69pLG56paenRyt0ERERkY9oaGg4rP7Tpk3jF7/4xQH3Dx48mKeffvpowzoiSkhEyaxZs9i06cMaRqmpqYwdO5YFCxYA8MQTT3DllVfu279lyxays7P3bU+cODF6wYqIdDPnnXfeviTwY489xjXXXLPf/r/+9a9ceOGFXH311Tz++ONhhCgiIiJCcXExY8aM4dprr2Xs2LFcfvnlVFVVkZuby3/+539y7LHH8tRTT/HKK68wa9Ysjj32WK644goqKysBmD9/PieccAKTJ09m+vTp7N69mzfffHPfzNB//OMf+2Z9Tp06ld27d1NcXMyECROASB2Nm2++mYkTJzJ16lTeeOMNAP70pz9x6aWXMnv2bEaNGsU3vvGNdrnebrvKRnPff34ZyzdXtOs5xw3uzXcvHH/ojkQyWK+99hqf/OQn92tv+uI7YMAA4uPjGTx4MJs3bwYid+ROP/10TjjhBM4++2xuvvn
"text/plain": [
"<Figure size 1296x3024 with 18 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"metrics = list(result.columns[[i not in [\"Beta\"] for i in result.columns]])\n",
"\n",
"charts_per_row = 6\n",
"charts_per_column = 3\n",
"\n",
"fig, axes = plt.subplots(\n",
" nrows=charts_per_row, ncols=charts_per_column, figsize=(18, 7 * charts_per_row)\n",
")\n",
"import itertools\n",
"\n",
"to_iter = [\n",
" i for i in itertools.product(range(charts_per_row), range(charts_per_column))\n",
"]\n",
"\n",
"for i in range(len(metrics)):\n",
" df = result[[\"Beta\", metrics[i]]]\n",
" df.plot(ax=axes[to_iter[i]], title=metrics[i], x=0, y=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Check sample recommendations"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user</th>\n",
" <th>rating</th>\n",
" <th>title</th>\n",
" <th>genres</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>522</th>\n",
" <td>817</td>\n",
" <td>5</td>\n",
" <td>Heat (1995)</td>\n",
" <td>Action, Crime, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Toy Story (1995)</td>\n",
" <td>Animation, Children's, Comedy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28224</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Conspiracy Theory (1997)</td>\n",
" <td>Action, Mystery, Romance, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69163</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Desperate Measures (1998)</td>\n",
" <td>Crime, Drama, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62281</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Broken Arrow (1996)</td>\n",
" <td>Action, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46995</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Cop Land (1997)</td>\n",
" <td>Crime, Drama, Mystery</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44432</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Bound (1996)</td>\n",
" <td>Crime, Drama, Romance, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36735</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Lone Star (1996)</td>\n",
" <td>Drama, Mystery</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32392</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Spawn (1997)</td>\n",
" <td>Action, Adventure, Sci-Fi, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30211</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Star Trek: First Contact (1996)</td>\n",
" <td>Action, Adventure, Sci-Fi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25251</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Twelve Monkeys (1995)</td>\n",
" <td>Drama, Sci-Fi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7384</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Saint, The (1997)</td>\n",
" <td>Action, Romance, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1394</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>River Wild, The (1994)</td>\n",
" <td>Action, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>922</th>\n",
" <td>817</td>\n",
" <td>4</td>\n",
" <td>Rumble in the Bronx (1995)</td>\n",
" <td>Action, Adventure, Crime</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25959</th>\n",
" <td>817</td>\n",
" <td>3</td>\n",
" <td>Dead Man Walking (1995)</td>\n",
" <td>Drama</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user rating title \\\n",
"522 817 5 Heat (1995) \n",
"85 817 4 Toy Story (1995) \n",
"28224 817 4 Conspiracy Theory (1997) \n",
"69163 817 4 Desperate Measures (1998) \n",
"62281 817 4 Broken Arrow (1996) \n",
"46995 817 4 Cop Land (1997) \n",
"44432 817 4 Bound (1996) \n",
"36735 817 4 Lone Star (1996) \n",
"32392 817 4 Spawn (1997) \n",
"30211 817 4 Star Trek: First Contact (1996) \n",
"25251 817 4 Twelve Monkeys (1995) \n",
"7384 817 4 Saint, The (1997) \n",
"1394 817 4 River Wild, The (1994) \n",
"922 817 4 Rumble in the Bronx (1995) \n",
"25959 817 3 Dead Man Walking (1995) \n",
"\n",
" genres \n",
"522 Action, Crime, Thriller \n",
"85 Animation, Children's, Comedy \n",
"28224 Action, Mystery, Romance, Thriller \n",
"69163 Crime, Drama, Thriller \n",
"62281 Action, Thriller \n",
"46995 Crime, Drama, Mystery \n",
"44432 Crime, Drama, Romance, Thriller \n",
"36735 Drama, Mystery \n",
"32392 Action, Adventure, Sci-Fi, Thriller \n",
"30211 Action, Adventure, Sci-Fi \n",
"25251 Drama, Sci-Fi \n",
"7384 Action, Romance, Thriller \n",
"1394 Action, Thriller \n",
"922 Action, Adventure, Crime \n",
"25959 Drama "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user</th>\n",
" <th>rec_nb</th>\n",
" <th>title</th>\n",
" <th>genres</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>356</th>\n",
" <td>817.0</td>\n",
" <td>1</td>\n",
" <td>Star Wars (1977)</td>\n",
" <td>Action, Adventure, Romance, Sci-Fi, War</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4699</th>\n",
" <td>817.0</td>\n",
" <td>2</td>\n",
" <td>Air Force One (1997)</td>\n",
" <td>Action, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7275</th>\n",
" <td>817.0</td>\n",
" <td>3</td>\n",
" <td>Fargo (1996)</td>\n",
" <td>Crime, Drama, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2969</th>\n",
" <td>817.0</td>\n",
" <td>4</td>\n",
" <td>Return of the Jedi (1983)</td>\n",
" <td>Action, Adventure, Romance, Sci-Fi, War</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1954</th>\n",
" <td>817.0</td>\n",
" <td>5</td>\n",
" <td>Scream (1996)</td>\n",
" <td>Horror, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1284</th>\n",
" <td>817.0</td>\n",
" <td>6</td>\n",
" <td>English Patient, The (1996)</td>\n",
" <td>Drama, Romance, War</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4996</th>\n",
" <td>817.0</td>\n",
" <td>7</td>\n",
" <td>Titanic (1997)</td>\n",
" <td>Action, Drama, Romance</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7667</th>\n",
" <td>817.0</td>\n",
" <td>8</td>\n",
" <td>Rock, The (1996)</td>\n",
" <td>Action, Adventure, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5453</th>\n",
" <td>817.0</td>\n",
" <td>9</td>\n",
" <td>Liar Liar (1997)</td>\n",
" <td>Comedy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2554</th>\n",
" <td>817.0</td>\n",
" <td>10</td>\n",
" <td>Godfather, The (1972)</td>\n",
" <td>Action, Crime, Drama</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user rec_nb title \\\n",
"356 817.0 1 Star Wars (1977) \n",
"4699 817.0 2 Air Force One (1997) \n",
"7275 817.0 3 Fargo (1996) \n",
"2969 817.0 4 Return of the Jedi (1983) \n",
"1954 817.0 5 Scream (1996) \n",
"1284 817.0 6 English Patient, The (1996) \n",
"4996 817.0 7 Titanic (1997) \n",
"7667 817.0 8 Rock, The (1996) \n",
"5453 817.0 9 Liar Liar (1997) \n",
"2554 817.0 10 Godfather, The (1972) \n",
"\n",
" genres \n",
"356 Action, Adventure, Romance, Sci-Fi, War \n",
"4699 Action, Thriller \n",
"7275 Crime, Drama, Thriller \n",
"2969 Action, Adventure, Romance, Sci-Fi, War \n",
"1954 Horror, Thriller \n",
"1284 Drama, Romance, War \n",
"4996 Action, Drama, Romance \n",
"7667 Action, Adventure, Thriller \n",
"5453 Comedy \n",
"2554 Action, Crime, Drama "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train = pd.read_csv(\n",
" \"./Datasets/ml-100k/train.csv\",\n",
" sep=\"\\t\",\n",
" header=None,\n",
" names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n",
")\n",
"items = pd.read_csv(\"./Datasets/ml-100k/movies.csv\")\n",
"\n",
"user = random.choice(list(set(train[\"user\"])))\n",
"\n",
"train_content = pd.merge(train, items, left_on=\"item\", right_on=\"id\")\n",
"display(\n",
" train_content[train_content[\"user\"] == user][\n",
" [\"user\", \"rating\", \"title\", \"genres\"]\n",
" ].sort_values(by=\"rating\", ascending=False)[:15]\n",
")\n",
"\n",
"reco = np.loadtxt(\"Recommendations generated/ml-100k/Self_P3_reco.csv\", delimiter=\",\")\n",
"items = pd.read_csv(\"./Datasets/ml-100k/movies.csv\")\n",
"\n",
"# Let's ignore scores - they are not used in evaluation:\n",
"reco_users = reco[:, :1]\n",
"reco_items = reco[:, 1::2]\n",
"# Let's put them into one array\n",
"reco = np.concatenate((reco_users, reco_items), axis=1)\n",
"\n",
"# Let's rebuild it user-item dataframe\n",
"recommended = []\n",
"for row in reco:\n",
" for rec_nb, entry in enumerate(row[1:]):\n",
" recommended.append((row[0], rec_nb + 1, entry))\n",
"recommended = pd.DataFrame(recommended, columns=[\"user\", \"rec_nb\", \"item\"])\n",
"\n",
"recommended_content = pd.merge(recommended, items, left_on=\"item\", right_on=\"id\")\n",
"recommended_content[recommended_content[\"user\"] == user][\n",
" [\"user\", \"rec_nb\", \"title\", \"genres\"]\n",
"].sort_values(by=\"rec_nb\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# project task 5: generate recommendations of RP3Beta for hyperparameters found to optimize recall"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"943it [00:00, 7257.50it/s]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>H2R</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3.702446</td>\n",
" <td>3.527273</td>\n",
" <td>0.282185</td>\n",
" <td>0.192092</td>\n",
" <td>0.186749</td>\n",
" <td>0.21698</td>\n",
" <td>0.204185</td>\n",
" <td>0.240096</td>\n",
" <td>0.339114</td>\n",
" <td>0.204905</td>\n",
" <td>0.572157</td>\n",
" <td>0.593544</td>\n",
" <td>0.875928</td>\n",
" <td>0.685048</td>\n",
" <td>1.0</td>\n",
" <td>0.077201</td>\n",
" <td>3.875892</td>\n",
" <td>0.974947</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" RMSE MAE precision recall F_1 F_05 \\\n",
"0 3.702446 3.527273 0.282185 0.192092 0.186749 0.21698 \n",
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
"0 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 \n",
"\n",
" HR H2R Reco in test Test coverage Shannon Gini \n",
"0 0.875928 0.685048 1.0 0.077201 3.875892 0.974947 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# We generated recommendations for P3, a special case of RP3Beta (with alpha=1, beta=0).\n",
"# We've observed that changing alpha and beta impacts the model performance.\n",
"\n",
"# Your task is find values alpha and beta for which recall will be the highest \n",
"# (any solution with recall higher than P3 will be accepted)\n",
"# train the model and generate recommendations.\n",
"\n",
"# save the outptut in 'Recommendations generated/ml-100k/Self_RP3Beta_estimations.csv'\n",
"# and 'Recommendations generated/ml-100k/Self_RP3Beta_reco.csv'\n",
"\n",
"import evaluation_measures as ev\n",
"\n",
"model=RP3Beta()\n",
"model.fit(train_ui, alpha=1, beta=0) #check recall values for alpha=1, beta = 0\n",
"model_reco=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
"model_reco.to_csv('Recommendations generated/ml-100k/Self_RP3Beta_reco.csv', index=False, header=False)\n",
"estimations_df=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
"estimations_df.to_csv('Recommendations generated/ml-100k/Self_RP3Beta_estimations.csv', index=False, header=False)\n",
"reco=np.loadtxt('Recommendations generated/ml-100k/Self_RP3Beta_reco.csv', delimiter=',')\n",
"\n",
"ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
" estimations_df=estimations_df, \n",
" reco=reco,\n",
" super_reactions=[4,5])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"943it [00:00, 7166.20it/s]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>H2R</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3.657073</td>\n",
" <td>3.481191</td>\n",
" <td>0.316543</td>\n",
" <td>0.213824</td>\n",
" <td>0.208731</td>\n",
" <td>0.24308</td>\n",
" <td>0.235515</td>\n",
" <td>0.272396</td>\n",
" <td>0.383442</td>\n",
" <td>0.245543</td>\n",
" <td>0.627971</td>\n",
" <td>0.604552</td>\n",
" <td>0.903499</td>\n",
" <td>0.725345</td>\n",
" <td>1.0</td>\n",
" <td>0.125541</td>\n",
" <td>4.347845</td>\n",
" <td>0.95941</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" RMSE MAE precision recall F_1 F_05 \\\n",
"0 3.657073 3.481191 0.316543 0.213824 0.208731 0.24308 \n",
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
"0 0.235515 0.272396 0.383442 0.245543 0.627971 0.604552 \n",
"\n",
" HR H2R Reco in test Test coverage Shannon Gini \n",
"0 0.903499 0.725345 1.0 0.125541 4.347845 0.95941 "
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import evaluation_measures as ev\n",
"\n",
"model=RP3Beta()\n",
"model.fit(train_ui, alpha=0.6, beta=0.5) #check recall values for alpha=0.6, beta = 0.5\n",
"model_reco=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
"model_reco.to_csv('Recommendations generated/ml-100k/Self_RP3Beta_reco.csv', index=False, header=False)\n",
"estimations_df=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
"estimations_df.to_csv('Recommendations generated/ml-100k/Self_RP3Beta_estimations.csv', index=False, header=False)\n",
"reco=np.loadtxt('Recommendations generated/ml-100k/Self_RP3Beta_reco.csv', delimiter=',')\n",
"\n",
"ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
" estimations_df=estimations_df, \n",
" reco=reco,\n",
" super_reactions=[4,5])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# project task 6 (optional): implement graph-based model of your choice "
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# for example change length of paths in RP3beta or make some other modification (but change more than input and hyperparameters)\n",
"# feel free to implement your idea or search for some ideas\n",
"\n",
"# save the outptut in 'Recommendations generated/ml-100k/Self_GraphTask_estimations.csv'\n",
"# and 'Recommendations generated/ml-100k/Self_GraphTask_reco.csv'"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}