WSS-project/P6. WRMF (Implicit ALS).ipynb

1737 lines
567 KiB
Plaintext
Raw Normal View History

2021-05-07 22:16:28 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import helpers\n",
"import pandas as pd\n",
"import numpy as np\n",
"import scipy.sparse as sparse\n",
"from collections import defaultdict\n",
"from itertools import chain\n",
"import random\n",
"import time\n",
"import matplotlib.pyplot as plt\n",
"import implicit\n",
"import evaluation_measures as ev\n",
"\n",
"train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n",
"test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
"train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"alpha = 30\n",
"train_ui*=alpha\n",
"train_iu=train_ui.transpose().tocsr()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Intel MKL BLAS detected. Its highly recommend to set the environment variable 'export MKL_NUM_THREADS=1' to disable its internal multithreading\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
2021-05-29 13:05:04 +02:00
"model_id": "06aa3c321208428b9b4395b9c4dff8f6",
2021-05-07 22:16:28 +02:00
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"model = implicit.als.AlternatingLeastSquares(factors=200, regularization=0.1, iterations=10)\n",
"model.fit(train_iu)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def top_k_recommendations(model, user_code_id, item_code_id, topK=10):\n",
" recommendations=[]\n",
" for u in range(train_ui.shape[0]):\n",
" u_recommended_items=model.recommend(u, train_ui, N=10, filter_already_liked_items=True)\n",
" recommendations.append([user_code_id[u]]+list(chain(*u_recommended_items)))\n",
" reco=pd.DataFrame(recommendations)\n",
" reco.iloc[:,1::2]=reco.iloc[:,1::2].applymap(lambda x: item_code_id[x])\n",
" return reco\n",
"\n",
"def estimate(model, user_code_id, item_code_id, test_ui):\n",
" result=[]\n",
" for user, item in zip(*test_ui.nonzero()):\n",
" result.append([user_code_id[user], item_code_id[item], \n",
" model.rank_items(userid=user, user_items=train_ui, selected_items=[item])[0][1]])\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"reco=top_k_recommendations(model, user_code_id, item_code_id, topK=10)\n",
"reco.to_csv('Recommendations generated/ml-100k/Ready_ImplicitALS_reco.csv', index=False, header=False)\n",
"\n",
"estimations_df=pd.DataFrame(estimate(model, user_code_id, item_code_id, test_ui))\n",
"estimations_df.to_csv('Recommendations generated/ml-100k/Ready_ImplicitALS_estimations.csv', index=False, header=False)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2021-05-29 13:05:04 +02:00
"943it [00:00, 9827.12it/s]\n"
2021-05-07 22:16:28 +02:00
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2021-05-29 13:05:04 +02:00
" <td>3.269156</td>\n",
" <td>3.070003</td>\n",
" <td>0.257582</td>\n",
" <td>0.18664</td>\n",
" <td>0.178445</td>\n",
" <td>0.202974</td>\n",
" <td>0.171137</td>\n",
" <td>0.216258</td>\n",
" <td>0.308415</td>\n",
" <td>0.175796</td>\n",
" <td>0.532835</td>\n",
" <td>0.590709</td>\n",
2021-05-07 22:16:28 +02:00
" <td>0.878049</td>\n",
2021-05-29 13:05:04 +02:00
" <td>0.999788</td>\n",
2021-05-07 22:16:28 +02:00
" <td>0.504329</td>\n",
2021-05-29 13:05:04 +02:00
" <td>5.761941</td>\n",
" <td>0.820874</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2021-05-29 13:05:04 +02:00
" RMSE MAE precision recall F_1 F_05 \\\n",
"0 3.269156 3.070003 0.257582 0.18664 0.178445 0.202974 \n",
2021-05-07 22:16:28 +02:00
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
2021-05-29 13:05:04 +02:00
"0 0.171137 0.216258 0.308415 0.175796 0.532835 0.590709 \n",
2021-05-07 22:16:28 +02:00
"\n",
2021-05-29 13:05:04 +02:00
" HR Reco in test Test coverage Shannon Gini \n",
"0 0.878049 0.999788 0.504329 5.761941 0.820874 "
2021-05-07 22:16:28 +02:00
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import evaluation_measures as ev\n",
"import imp\n",
"imp.reload(ev)\n",
"\n",
"estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_ImplicitALS_estimations.csv', header=None)\n",
"reco=np.loadtxt('Recommendations generated/ml-100k/Ready_ImplicitALS_reco.csv', delimiter=',')\n",
"\n",
"ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
" estimations_df=estimations_df, \n",
" reco=reco,\n",
" super_reactions=[4,5])\n",
"#also you can just type ev.evaluate_all(estimations_df, reco) - I put above values as default"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Hyperparameters tuning"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Number of latent factors"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/15 [00:00<?, ?it/s]\n",
2021-05-29 13:05:04 +02:00
"943it [00:00, 10189.63it/s]\n",
" 7%|▋ | 1/15 [00:00<00:13, 1.01it/s]\n",
"943it [00:00, 10892.14it/s]\n",
" 13%|█▎ | 2/15 [00:02<00:13, 1.02s/it]\n",
"943it [00:00, 9878.59it/s]\n",
2021-05-07 22:16:28 +02:00
" 20%|██ | 3/15 [00:03<00:12, 1.05s/it]\n",
2021-05-29 13:05:04 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 8448.36it/s]\u001b[A\n",
" 27%|██▋ | 4/15 [00:04<00:11, 1.09s/it]\n",
"943it [00:00, 10309.18it/s]\n",
" 33%|███▎ | 5/15 [00:05<00:11, 1.12s/it]\n",
"943it [00:00, 9627.27it/s]\n",
" 40%|████ | 6/15 [00:06<00:10, 1.17s/it]\n",
"943it [00:00, 10064.14it/s]\n",
" 47%|████▋ | 7/15 [00:08<00:10, 1.27s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 7773.81it/s]\u001b[A\n",
" 53%|█████▎ | 8/15 [00:09<00:09, 1.35s/it]\n",
"943it [00:00, 10183.39it/s]\n",
" 60%|██████ | 9/15 [00:11<00:08, 1.42s/it]\n",
"943it [00:00, 10380.88it/s]\n",
" 67%|██████▋ | 10/15 [00:13<00:07, 1.49s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 8598.01it/s]\u001b[A\n",
" 73%|███████▎ | 11/15 [00:14<00:06, 1.60s/it]\n",
"0it [00:00, ?it/s]\u001b[A\n",
"943it [00:00, 8767.21it/s]\u001b[A\n",
" 80%|████████ | 12/15 [00:16<00:05, 1.69s/it]\n",
"943it [00:00, 10008.50it/s]\n",
" 87%|████████▋ | 13/15 [00:18<00:03, 1.77s/it]\n",
"943it [00:00, 10347.28it/s]\n",
" 93%|█████████▎| 14/15 [00:20<00:01, 1.80s/it]\n",
"943it [00:00, 10531.63it/s]\n",
"100%|██████████| 15/15 [00:22<00:00, 1.51s/it]\n"
2021-05-07 22:16:28 +02:00
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Factors</th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>25</td>\n",
2021-05-29 13:05:04 +02:00
" <td>2.883547</td>\n",
" <td>2.662389</td>\n",
" <td>0.160445</td>\n",
" <td>0.151301</td>\n",
" <td>0.130827</td>\n",
" <td>0.138574</td>\n",
" <td>0.099785</td>\n",
" <td>0.173271</td>\n",
" <td>0.186102</td>\n",
" <td>0.088639</td>\n",
" <td>0.321376</td>\n",
" <td>0.572603</td>\n",
" <td>0.772004</td>\n",
" <td>0.998940</td>\n",
" <td>0.670996</td>\n",
" <td>6.307257</td>\n",
" <td>0.691244</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>50</td>\n",
2021-05-29 13:05:04 +02:00
" <td>2.959637</td>\n",
" <td>2.742050</td>\n",
" <td>0.185366</td>\n",
" <td>0.164963</td>\n",
" <td>0.146859</td>\n",
" <td>0.157846</td>\n",
" <td>0.115987</td>\n",
" <td>0.187880</td>\n",
" <td>0.215360</td>\n",
" <td>0.107503</td>\n",
" <td>0.367134</td>\n",
" <td>0.579544</td>\n",
" <td>0.813362</td>\n",
" <td>0.999788</td>\n",
" <td>0.612554</td>\n",
" <td>6.128382</td>\n",
" <td>0.740882</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>75</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.035941</td>\n",
" <td>2.822359</td>\n",
" <td>0.210180</td>\n",
" <td>0.180590</td>\n",
" <td>0.161700</td>\n",
" <td>0.175574</td>\n",
" <td>0.135944</td>\n",
" <td>0.207044</td>\n",
" <td>0.249073</td>\n",
" <td>0.128013</td>\n",
" <td>0.429789</td>\n",
" <td>0.587471</td>\n",
" <td>0.864263</td>\n",
" <td>1.000000</td>\n",
" <td>0.561328</td>\n",
" <td>5.947086</td>\n",
" <td>0.783018</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.101731</td>\n",
" <td>2.891083</td>\n",
" <td>0.226829</td>\n",
" <td>0.186581</td>\n",
" <td>0.169359</td>\n",
" <td>0.186513</td>\n",
" <td>0.146674</td>\n",
" <td>0.213931</td>\n",
" <td>0.276863</td>\n",
" <td>0.147407</td>\n",
" <td>0.493030</td>\n",
" <td>0.590562</td>\n",
" <td>0.888653</td>\n",
2021-05-07 22:16:28 +02:00
" <td>0.999894</td>\n",
2021-05-29 13:05:04 +02:00
" <td>0.528860</td>\n",
" <td>5.872001</td>\n",
" <td>0.802429</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>125</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.162336</td>\n",
" <td>2.954918</td>\n",
" <td>0.240297</td>\n",
" <td>0.182437</td>\n",
" <td>0.172145</td>\n",
" <td>0.193263</td>\n",
" <td>0.156438</td>\n",
" <td>0.211291</td>\n",
" <td>0.281856</td>\n",
" <td>0.152367</td>\n",
" <td>0.482915</td>\n",
" <td>0.588517</td>\n",
" <td>0.872747</td>\n",
2021-05-07 22:16:28 +02:00
" <td>0.999894</td>\n",
2021-05-29 13:05:04 +02:00
" <td>0.506494</td>\n",
" <td>5.805071</td>\n",
" <td>0.814995</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>150</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.203476</td>\n",
" <td>2.999182</td>\n",
" <td>0.243584</td>\n",
" <td>0.183088</td>\n",
" <td>0.171628</td>\n",
" <td>0.193841</td>\n",
" <td>0.158047</td>\n",
" <td>0.210114</td>\n",
" <td>0.288516</td>\n",
" <td>0.157567</td>\n",
" <td>0.496442</td>\n",
" <td>0.588875</td>\n",
" <td>0.873807</td>\n",
" <td>0.999576</td>\n",
" <td>0.499278</td>\n",
" <td>5.766679</td>\n",
" <td>0.820675</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>175</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.241546</td>\n",
" <td>3.039763</td>\n",
" <td>0.245281</td>\n",
" <td>0.184557</td>\n",
" <td>0.172602</td>\n",
" <td>0.194957</td>\n",
" <td>0.162017</td>\n",
" <td>0.209834</td>\n",
" <td>0.294425</td>\n",
" <td>0.163943</td>\n",
" <td>0.512398</td>\n",
" <td>0.589619</td>\n",
" <td>0.875928</td>\n",
" <td>0.999682</td>\n",
" <td>0.512266</td>\n",
" <td>5.792640</td>\n",
" <td>0.816326</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>200</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.264120</td>\n",
" <td>3.064814</td>\n",
" <td>0.247932</td>\n",
" <td>0.178100</td>\n",
" <td>0.170734</td>\n",
" <td>0.195007</td>\n",
" <td>0.164485</td>\n",
" <td>0.204995</td>\n",
" <td>0.294926</td>\n",
" <td>0.166334</td>\n",
" <td>0.508886</td>\n",
" <td>0.586400</td>\n",
" <td>0.858961</td>\n",
" <td>1.000000</td>\n",
" <td>0.507215</td>\n",
" <td>5.751094</td>\n",
" <td>0.822069</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>225</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.291242</td>\n",
" <td>3.094641</td>\n",
" <td>0.251326</td>\n",
" <td>0.179623</td>\n",
" <td>0.172184</td>\n",
" <td>0.197367</td>\n",
" <td>0.163948</td>\n",
" <td>0.207106</td>\n",
" <td>0.304110</td>\n",
" <td>0.171370</td>\n",
" <td>0.532284</td>\n",
" <td>0.587198</td>\n",
" <td>0.879109</td>\n",
" <td>0.999576</td>\n",
" <td>0.489899</td>\n",
" <td>5.747192</td>\n",
" <td>0.822054</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>250</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.301411</td>\n",
" <td>3.105478</td>\n",
" <td>0.259703</td>\n",
" <td>0.186612</td>\n",
" <td>0.177475</td>\n",
" <td>0.202945</td>\n",
" <td>0.172961</td>\n",
" <td>0.217178</td>\n",
" <td>0.312328</td>\n",
" <td>0.179115</td>\n",
" <td>0.534315</td>\n",
" <td>0.590712</td>\n",
" <td>0.879109</td>\n",
" <td>0.999682</td>\n",
" <td>0.491342</td>\n",
" <td>5.709178</td>\n",
" <td>0.829702</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>275</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.321122</td>\n",
" <td>3.127153</td>\n",
" <td>0.265429</td>\n",
" <td>0.190661</td>\n",
" <td>0.180937</td>\n",
" <td>0.207261</td>\n",
" <td>0.175107</td>\n",
" <td>0.214145</td>\n",
" <td>0.315759</td>\n",
" <td>0.182169</td>\n",
" <td>0.528570</td>\n",
" <td>0.592760</td>\n",
" <td>0.875928</td>\n",
" <td>0.999682</td>\n",
" <td>0.495671</td>\n",
" <td>5.676727</td>\n",
" <td>0.832700</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>300</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.328603</td>\n",
" <td>3.135320</td>\n",
" <td>0.260976</td>\n",
" <td>0.187770</td>\n",
" <td>0.179302</td>\n",
" <td>0.204948</td>\n",
" <td>0.174356</td>\n",
" <td>0.217609</td>\n",
" <td>0.312703</td>\n",
" <td>0.178007</td>\n",
" <td>0.537737</td>\n",
" <td>0.591298</td>\n",
" <td>0.876988</td>\n",
" <td>0.999788</td>\n",
" <td>0.494949</td>\n",
" <td>5.618692</td>\n",
" <td>0.838369</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>325</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.340508</td>\n",
" <td>3.147901</td>\n",
" <td>0.267444</td>\n",
" <td>0.189835</td>\n",
" <td>0.181971</td>\n",
" <td>0.208957</td>\n",
" <td>0.176180</td>\n",
" <td>0.220058</td>\n",
" <td>0.319151</td>\n",
" <td>0.184651</td>\n",
" <td>0.541390</td>\n",
" <td>0.592342</td>\n",
" <td>0.872747</td>\n",
" <td>0.999682</td>\n",
" <td>0.492063</td>\n",
" <td>5.650832</td>\n",
" <td>0.834855</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>350</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.347853</td>\n",
" <td>3.155924</td>\n",
" <td>0.273065</td>\n",
" <td>0.194921</td>\n",
" <td>0.185570</td>\n",
" <td>0.213002</td>\n",
" <td>0.182511</td>\n",
" <td>0.226180</td>\n",
" <td>0.325784</td>\n",
" <td>0.190838</td>\n",
" <td>0.543873</td>\n",
" <td>0.594921</td>\n",
" <td>0.885472</td>\n",
" <td>0.999894</td>\n",
" <td>0.484848</td>\n",
" <td>5.633676</td>\n",
" <td>0.839884</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>375</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.355182</td>\n",
" <td>3.164099</td>\n",
" <td>0.268717</td>\n",
" <td>0.186555</td>\n",
" <td>0.179748</td>\n",
" <td>0.207861</td>\n",
" <td>0.178541</td>\n",
" <td>0.219777</td>\n",
" <td>0.319529</td>\n",
" <td>0.187262</td>\n",
" <td>0.531547</td>\n",
" <td>0.590719</td>\n",
" <td>0.878049</td>\n",
" <td>0.999364</td>\n",
" <td>0.483405</td>\n",
" <td>5.569003</td>\n",
" <td>0.845486</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Factors RMSE MAE precision recall F_1 F_05 \\\n",
2021-05-29 13:05:04 +02:00
"0 25 2.883547 2.662389 0.160445 0.151301 0.130827 0.138574 \n",
"0 50 2.959637 2.742050 0.185366 0.164963 0.146859 0.157846 \n",
"0 75 3.035941 2.822359 0.210180 0.180590 0.161700 0.175574 \n",
"0 100 3.101731 2.891083 0.226829 0.186581 0.169359 0.186513 \n",
"0 125 3.162336 2.954918 0.240297 0.182437 0.172145 0.193263 \n",
"0 150 3.203476 2.999182 0.243584 0.183088 0.171628 0.193841 \n",
"0 175 3.241546 3.039763 0.245281 0.184557 0.172602 0.194957 \n",
"0 200 3.264120 3.064814 0.247932 0.178100 0.170734 0.195007 \n",
"0 225 3.291242 3.094641 0.251326 0.179623 0.172184 0.197367 \n",
"0 250 3.301411 3.105478 0.259703 0.186612 0.177475 0.202945 \n",
"0 275 3.321122 3.127153 0.265429 0.190661 0.180937 0.207261 \n",
"0 300 3.328603 3.135320 0.260976 0.187770 0.179302 0.204948 \n",
"0 325 3.340508 3.147901 0.267444 0.189835 0.181971 0.208957 \n",
"0 350 3.347853 3.155924 0.273065 0.194921 0.185570 0.213002 \n",
"0 375 3.355182 3.164099 0.268717 0.186555 0.179748 0.207861 \n",
2021-05-07 22:16:28 +02:00
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
2021-05-29 13:05:04 +02:00
"0 0.099785 0.173271 0.186102 0.088639 0.321376 0.572603 \n",
"0 0.115987 0.187880 0.215360 0.107503 0.367134 0.579544 \n",
"0 0.135944 0.207044 0.249073 0.128013 0.429789 0.587471 \n",
"0 0.146674 0.213931 0.276863 0.147407 0.493030 0.590562 \n",
"0 0.156438 0.211291 0.281856 0.152367 0.482915 0.588517 \n",
"0 0.158047 0.210114 0.288516 0.157567 0.496442 0.588875 \n",
"0 0.162017 0.209834 0.294425 0.163943 0.512398 0.589619 \n",
"0 0.164485 0.204995 0.294926 0.166334 0.508886 0.586400 \n",
"0 0.163948 0.207106 0.304110 0.171370 0.532284 0.587198 \n",
"0 0.172961 0.217178 0.312328 0.179115 0.534315 0.590712 \n",
"0 0.175107 0.214145 0.315759 0.182169 0.528570 0.592760 \n",
"0 0.174356 0.217609 0.312703 0.178007 0.537737 0.591298 \n",
"0 0.176180 0.220058 0.319151 0.184651 0.541390 0.592342 \n",
"0 0.182511 0.226180 0.325784 0.190838 0.543873 0.594921 \n",
"0 0.178541 0.219777 0.319529 0.187262 0.531547 0.590719 \n",
2021-05-07 22:16:28 +02:00
"\n",
" HR Reco in test Test coverage Shannon Gini \n",
2021-05-29 13:05:04 +02:00
"0 0.772004 0.998940 0.670996 6.307257 0.691244 \n",
"0 0.813362 0.999788 0.612554 6.128382 0.740882 \n",
"0 0.864263 1.000000 0.561328 5.947086 0.783018 \n",
"0 0.888653 0.999894 0.528860 5.872001 0.802429 \n",
"0 0.872747 0.999894 0.506494 5.805071 0.814995 \n",
"0 0.873807 0.999576 0.499278 5.766679 0.820675 \n",
"0 0.875928 0.999682 0.512266 5.792640 0.816326 \n",
"0 0.858961 1.000000 0.507215 5.751094 0.822069 \n",
"0 0.879109 0.999576 0.489899 5.747192 0.822054 \n",
"0 0.879109 0.999682 0.491342 5.709178 0.829702 \n",
"0 0.875928 0.999682 0.495671 5.676727 0.832700 \n",
"0 0.876988 0.999788 0.494949 5.618692 0.838369 \n",
"0 0.872747 0.999682 0.492063 5.650832 0.834855 \n",
"0 0.885472 0.999894 0.484848 5.633676 0.839884 \n",
"0 0.878049 0.999364 0.483405 5.569003 0.845486 "
2021-05-07 22:16:28 +02:00
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from tqdm import tqdm\n",
"result=[]\n",
"for factors in tqdm([i for i in np.arange(25,400,25)]):\n",
" train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n",
" test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
" train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)\n",
" \n",
2021-05-29 13:05:04 +02:00
" train_ui*=alpha\n",
2021-05-07 22:16:28 +02:00
" train_iu=train_ui.transpose().tocsr()\n",
" \n",
" model = implicit.als.AlternatingLeastSquares(factors=factors, regularization=0.1, iterations=10)\n",
" model.fit(train_iu, show_progress=False)\n",
" \n",
" reco=top_k_recommendations(model, user_code_id, item_code_id, topK=10)\n",
" estimations_df=pd.DataFrame(estimate(model, user_code_id, item_code_id, test_ui))\n",
" \n",
" to_append=ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
" estimations_df=estimations_df, \n",
" reco=np.array(reco),\n",
" super_reactions=[4,5])\n",
" to_append.insert(0, \"Factors\", factors)\n",
" result.append(to_append)\n",
" \n",
"result=pd.concat(result)\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
2021-05-29 13:05:04 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABCAAAAkoCAYAAAC6aM7IAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd3RU17nG4d9WB0RToakg0XsxohlccO/YcQP33ltiJ3ESO3bazU3iFPvGDnHcbYp7x51mME30JpokJCGBhIQa6tK+f2ggQhZYgGbOlPdZa5Y1Z45mXo1hc/TN3t821lpERERERERERNwpyOkAIiIiIiIiIuL/VIAQEREREREREbdTAUJERERERERE3E4FCBERERERERFxOxUgRERERERERMTtVIAQEREREREREbdTAUJERERERMRPGWNmGGMeb8V5m4wxp7s/kQQyFSDE6xljMo0xlcaYcmPMHmPMK8aYSNdjrxhjrDHmkmbf8w/X8Ztc98OMMX81xuS4nifDGPP3I7zGwds/PfqDioj4CNeYWWOMiWl2fK1r7E1qcuxJ17Fxzc69yRhT32zcLTfG9PLQjyEiEhCstXdZa3/XivOGWmsXeCCSBDAVIMRXXGytjQRGAaOBXzR5bBtw48E7xpgQ4EpgZ5NzfgGkAOOAjsAUYE1Lr9Hkdl+b/xQiIv4jA5h+8I4xZjjQrukJxhgDXA8U0WScbmJps3E30lqb687QIiK+yHV9K+LzVIAQn2Kt3QN8QWMh4qCPgUnGmK6u++cB64E9Tc4ZC7xvrc21jTKtta95IrOIiJ96Hbihyf0bgebj6ilAL+BBYJoxJsxD2UREfIJrRtkvjDGbjTH7jTEvG2MijDGnu2bu/twYswd42RgTZIx51Biz0xhTaIx5yxgT1eS5JhtjvjPGFBtjspvMBH7FGPN719cxxphPXOcUGWO+NcYENclyluvrcNeM4lzX7R/GmHDXYwezPWyMyTfG5Bljbvb0eye+SQUI8SnGmHjgfGBHk8NVwEfANNf9G/j+RfAy4CfGmHuMMcNdn8qJiMjxWwZ0MsYMNsYEA1cDbzQ750Yai8Rvuu5f5MF8IiK+4lrgXKAvMAB4zHW8BxAF9AbuAB4ALgVOo7G4ux94FsAYkwh8BvwfEEvjh3VrW3ith4Ec1zndgV8CtoXzfgVMcD3PSBpnET/W5PEeQGcgDrgVeLbJh4EiR6QChPiKD4wxZUA2kA880ezx14AbjDGdaRyUP2j2+B+BP9E4wKcCu40xzacDf+CqBh+83d7WP4SIiJ85OAvibCAN2H3wAWNMexqXw82y1tYC7/D9ZRgTmo27OxERCTz/tNZmW2uLgD/w3+VtDcAT1tpqa20lcCfwK2ttjrW2GngSuMK1PONa4Gtr7Wxrba21ttBau7aF16oFegK9Xed9a61tqQBxLfBba22+tbYA+A2NS+qaPs9vXc8xFygHBp7g+yABQAUI8RWXWms7AqcDg4DDGp9ZaxfTWMl9DPjENUg3fbzeWvustXYS0IXGwf0lY8zgZq/RpcntP+77cURE/MLrwDXATXx/5tllQB0w13V/JnC+MSa2yTnLmo27fd0dWETEC2U3+XoXjbMbAAqstVVNHusNvH+waAtsAeppnMmQwOH9z47kLzTOJP7SGJNujHn0COf1cmVpKRdAobW2rsn9CiCyFa8vAU4FCPEp1tqFwCvAUy08/AaN08qO2tvBWltprX2WxmlrQ9o6o4hIoLDW7qKxGeUFwHvNHr6RxovRLNf65beBUJo0rhQREaCxeHBQInCwGW/zmQnZwPnNCrcR1trdrsd+sIhrrS2z1j5sre0DXEzjEuUzWzg1l8aCR0u5RI6bChDii/4BnG2MGdXs+DM0TgNe1PwbjDEPuRrmtDPGhLiWX3Tk+zthiIjIsbkVOMNae6DJsTjgTBp7Poziv2uI/0TLu2GIiASye40x8a6Gkr/kv31zmpsB/MEY0xvAGBNrjJnqemwmcJYx5irXtW50C9fKGGMuMsb0c/VDK6VxBkV9C681G3jM9RoxwK/5fp8fkWOmAoT4HNc6tNeAx5sdL7LWfnOEdWyVwF9p3BljH3AvcLm1Nr3JOR8324v+fTf9CCIifsNau9Nam9rs8CnAWmvtl9baPQdvNBaKRxhjhrnOm9hs3C03xoz16A8gIuK8WcCXQLrr9vsjnPc0jY3Xv3T1RlsGjAew1mbROBvtYRq3Pl5LY+G3uf7A1zT2bFgKPGetXdDCeb+nsW/aemADsPoouURazbT8u5qIiIiIiIi4kzEmE7jNWvu101lEPEEzIERERERERETE7VSAEBERERERERG30xIMEREREREREXE7zYAQEREREREREbdTAUJERERERERE3C7E6QAtiYmJsUlJSU7HEBE5zKpVq/ZZa2OdzuEJGodFxBsF0jgMGotFxDudyFjslQWIpKQkUlObbykuIuIsY8wupzN4isZhEfFGgTQOg8ZiEfFOJzIWawmGiIiIiIiIiLidChAiIiIiIiIi4nYqQIiIiIiIiIiI23llD4iW1NbWkpOTQ1VVldNRPC4iIoL4+HhCQ0OdjiIiASwQxmGNtyLi7QJhLHYnjfMizvKZAkROTg4dO3YkKSkJY4zTcTzGWkthYSE5OTkkJyc7HUdEApi/j8Mab0XEF/j7WOxOGudFnOczSzCqqqqIjo4OuIHWGEN0dLSq3CLiOH8fhzXeiogv8Pex2J00zos4z2cKEEDADrSB+nOLiPfx9/HI338+EfEPGquOn947EWf5VAHCacHBwYwaNYphw4Zx8cUXU1xcDEBmZibGGB5//PFD5+7bt4/Q0FDuu+8+ALZu3crpp5/OqFGjGDx4MHfccQcACxYsoHPnzowaNerQ7euvv/b4zyYi4guMMVx//fWH7tfV1REbG8tFF1102HlTp05l4sSJhx178skniYuLO2y8PTiOi4iIc1JTU3nggQeO+Hhubi5XXHGFBxOJiLuoAHEM2rVrx9q1a9m4cSNRUVE8++yzhx7r06cPn3zyyaH7b7/9NkOHDj10/4EHHuDHP/4xa9euZcuWLdx///2HHjvllFNYu3btodtZZ53lmR9IRMTHdOjQgY0bN1JZWQnAV199RVxc3GHnFBcXs3r1aoqLi8nIyDjssYPj8MFbly5dPBVdRCRg1NfXH9P5KSkpPPPMM0d8vFevXrzzzjsnGktEvIAKEMdp4sSJ7N69+9D9du3aMXjwYFJTUwF48803ueqqqw49npeXR3x8/KH7w4cP91xYERE/cv755/Ppp58CMHv2bKZPn37Y4++++y4XX3wx06ZNY86cOU5EFBHxW5mZmQwaNIgbb7yRESNGcMUVV1BRUUFSUhK//e1vmTx5Mm+//TZffvklEydO5KSTTuLKK6+kvLwcgJUrV3LyySczcuRIxo0bR1lZGQsWLDg0k23hwoWHZqmNHj2asrIyMjMzGTZsGNDYA+Pmm29m+PDhjB49mvnz5wPwyiuv8KMf/YjzzjuP/v3787Of/cyZN0hEjspndsFo6jcfb2JzbmmbPueQXp144uKhP3wijVXdb775hltvvfWw4wcvdnv06EFwcDC9evUiNzcXaPzU7YwzzuDkk0/mnHPO4eabbz70ydu3337LqFGjDj3Pu+++S9++fdvk5xIRcQcnx+Fp06bx29/+losuuoj169dzyy238O233x56fPbs2TzxxBN0796dK664gl/84heHHvv73//OG2+8AUDXrl0PXbiKiPgip8birVu38uKLLzJp0iRuueUWnnvuOaBxi8vFixezb98+fvSjH/H111/ToUMH/vSnP/G3v/2NRx99lKuvvpo333yTsWPHUlpaSrt27Q577qeeeopnn32WSZMmUV5eTkRExGGPH5yBvGHDBtLS0jjnnHPYtm0bAGvXrmXNmjWEh4czcOBA7r//fhISEtrqrRGRNqAZEMegsrKSUaNGER0dTVFREWefffZhj5933nl89dVXzJ49m6uvvvqwx26++Wa2bNnClVdeyYIFC5gwYQLV1dXA95dgqPgg4j6VNfV8t3MfJZW1TkeR4zRixAg
2021-05-07 22:16:28 +02:00
"text/plain": [
"<Figure size 1296x3024 with 18 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"metrics=list(result.columns[[i not in ['Factors'] for i in result.columns]])\n",
"\n",
"charts_per_row=6\n",
"charts_per_column=3\n",
"\n",
"fig, axes = plt.subplots(nrows=charts_per_row, ncols=charts_per_column,figsize=(18, 7*charts_per_row ))\n",
"import itertools\n",
"to_iter=[i for i in itertools.product(range(charts_per_row), range(charts_per_column))]\n",
"\n",
"for i in range(len(metrics)):\n",
" df=result[['Factors', metrics[i]]]\n",
" df.plot(ax=axes[to_iter[i]], title=metrics[i], x=0, y=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Alpha"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/13 [00:00<?, ?it/s]\n",
2021-05-29 13:05:04 +02:00
"943it [00:00, 9631.70it/s]\n",
" 8%|▊ | 1/13 [00:01<00:21, 1.81s/it]\n",
"943it [00:00, 9579.52it/s]\n",
" 15%|█▌ | 2/13 [00:04<00:21, 1.96s/it]\n",
"943it [00:00, 10126.66it/s]\n",
" 23%|██▎ | 3/13 [00:06<00:19, 1.97s/it]\n",
"943it [00:00, 10327.24it/s]\n",
" 31%|███ | 4/13 [00:08<00:17, 2.00s/it]\n",
"943it [00:00, 10078.35it/s]\n",
" 38%|███▊ | 5/13 [00:10<00:16, 2.00s/it]\n",
"943it [00:00, 9576.25it/s]\n",
" 46%|████▌ | 6/13 [00:12<00:13, 1.99s/it]\n",
"943it [00:00, 9950.61it/s]\n",
" 54%|█████▍ | 7/13 [00:14<00:11, 1.96s/it]\n",
2021-05-07 22:16:28 +02:00
"0it [00:00, ?it/s]\u001b[A\n",
2021-05-29 13:05:04 +02:00
"943it [00:00, 9270.47it/s]\u001b[A\n",
" 62%|██████▏ | 8/13 [00:16<00:09, 1.96s/it]\n",
"943it [00:00, 10790.95it/s]\n",
" 69%|██████▉ | 9/13 [00:17<00:07, 1.92s/it]\n",
"943it [00:00, 9690.89it/s]\n",
" 77%|███████▋ | 10/13 [00:19<00:05, 1.90s/it]\n",
"943it [00:00, 10651.40it/s]\n",
" 85%|████████▍ | 11/13 [00:21<00:03, 1.94s/it]\n",
"943it [00:00, 10100.25it/s]\n",
" 92%|█████████▏| 12/13 [00:23<00:01, 1.94s/it]\n",
"943it [00:00, 10501.13it/s]\n",
"100%|██████████| 13/13 [00:25<00:00, 1.96s/it]\n"
2021-05-07 22:16:28 +02:00
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Alpha</th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.667029</td>\n",
" <td>3.488836</td>\n",
" <td>0.096713</td>\n",
" <td>0.071467</td>\n",
" <td>0.067186</td>\n",
" <td>0.076112</td>\n",
" <td>0.054506</td>\n",
" <td>0.074237</td>\n",
" <td>0.118179</td>\n",
" <td>0.053270</td>\n",
" <td>0.260293</td>\n",
" <td>0.532465</td>\n",
" <td>0.568399</td>\n",
" <td>0.994168</td>\n",
" <td>0.734488</td>\n",
" <td>6.693136</td>\n",
" <td>0.541379</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.628018</td>\n",
" <td>3.448919</td>\n",
" <td>0.147826</td>\n",
" <td>0.103487</td>\n",
" <td>0.099285</td>\n",
" <td>0.114498</td>\n",
" <td>0.090021</td>\n",
" <td>0.107763</td>\n",
" <td>0.176286</td>\n",
" <td>0.085984</td>\n",
" <td>0.353319</td>\n",
" <td>0.548672</td>\n",
" <td>0.709438</td>\n",
" <td>0.994168</td>\n",
" <td>0.697691</td>\n",
" <td>6.626478</td>\n",
" <td>0.573536</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.556791</td>\n",
" <td>3.375833</td>\n",
" <td>0.218982</td>\n",
" <td>0.146258</td>\n",
" <td>0.143279</td>\n",
" <td>0.167046</td>\n",
" <td>0.142811</td>\n",
" <td>0.167806</td>\n",
" <td>0.263200</td>\n",
" <td>0.146602</td>\n",
" <td>0.481264</td>\n",
" <td>0.570360</td>\n",
" <td>0.812301</td>\n",
" <td>0.997667</td>\n",
" <td>0.628427</td>\n",
" <td>6.318295</td>\n",
" <td>0.692941</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.510641</td>\n",
" <td>3.328261</td>\n",
" <td>0.243902</td>\n",
" <td>0.164535</td>\n",
" <td>0.160648</td>\n",
" <td>0.187002</td>\n",
" <td>0.162768</td>\n",
" <td>0.191288</td>\n",
" <td>0.295153</td>\n",
" <td>0.169161</td>\n",
" <td>0.524880</td>\n",
" <td>0.579620</td>\n",
" <td>0.848356</td>\n",
" <td>0.997879</td>\n",
" <td>0.585137</td>\n",
" <td>6.098749</td>\n",
" <td>0.751148</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.355705</td>\n",
" <td>3.164734</td>\n",
" <td>0.267550</td>\n",
" <td>0.188253</td>\n",
" <td>0.180257</td>\n",
" <td>0.207965</td>\n",
" <td>0.179185</td>\n",
" <td>0.218573</td>\n",
" <td>0.319477</td>\n",
" <td>0.184459</td>\n",
" <td>0.542908</td>\n",
" <td>0.591560</td>\n",
" <td>0.888653</td>\n",
" <td>1.000000</td>\n",
" <td>0.484848</td>\n",
" <td>5.570362</td>\n",
" <td>0.845232</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>50</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.273295</td>\n",
" <td>3.075884</td>\n",
" <td>0.267444</td>\n",
" <td>0.190215</td>\n",
" <td>0.180829</td>\n",
" <td>0.207961</td>\n",
" <td>0.176288</td>\n",
" <td>0.218506</td>\n",
" <td>0.317305</td>\n",
" <td>0.183281</td>\n",
" <td>0.525159</td>\n",
" <td>0.592538</td>\n",
" <td>0.880170</td>\n",
" <td>1.000000</td>\n",
" <td>0.455267</td>\n",
" <td>5.520912</td>\n",
" <td>0.854004</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>70</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.216138</td>\n",
" <td>3.014393</td>\n",
" <td>0.250795</td>\n",
" <td>0.177880</td>\n",
" <td>0.170805</td>\n",
" <td>0.196090</td>\n",
" <td>0.162017</td>\n",
" <td>0.197930</td>\n",
" <td>0.295512</td>\n",
" <td>0.165824</td>\n",
" <td>0.506624</td>\n",
" <td>0.586287</td>\n",
" <td>0.862142</td>\n",
" <td>1.000000</td>\n",
" <td>0.453102</td>\n",
" <td>5.501145</td>\n",
" <td>0.858617</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>90</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.173423</td>\n",
" <td>2.969030</td>\n",
" <td>0.248993</td>\n",
" <td>0.176408</td>\n",
" <td>0.169005</td>\n",
" <td>0.194434</td>\n",
" <td>0.164056</td>\n",
" <td>0.205219</td>\n",
" <td>0.294456</td>\n",
" <td>0.166556</td>\n",
" <td>0.504511</td>\n",
" <td>0.585555</td>\n",
" <td>0.871686</td>\n",
2021-05-07 22:16:28 +02:00
" <td>1.000000</td>\n",
2021-05-29 13:05:04 +02:00
" <td>0.453102</td>\n",
" <td>5.524114</td>\n",
" <td>0.856353</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>110</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.142262</td>\n",
" <td>2.935499</td>\n",
" <td>0.229056</td>\n",
" <td>0.161752</td>\n",
" <td>0.153913</td>\n",
" <td>0.177522</td>\n",
" <td>0.147961</td>\n",
" <td>0.179891</td>\n",
" <td>0.264254</td>\n",
" <td>0.142434</td>\n",
" <td>0.451158</td>\n",
" <td>0.578115</td>\n",
" <td>0.846235</td>\n",
" <td>0.999894</td>\n",
" <td>0.462482</td>\n",
" <td>5.455059</td>\n",
" <td>0.863686</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>130</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.111596</td>\n",
" <td>2.902799</td>\n",
" <td>0.225769</td>\n",
" <td>0.159152</td>\n",
" <td>0.152693</td>\n",
" <td>0.175730</td>\n",
" <td>0.146674</td>\n",
" <td>0.181274</td>\n",
" <td>0.259610</td>\n",
" <td>0.139655</td>\n",
" <td>0.443082</td>\n",
" <td>0.576798</td>\n",
" <td>0.834571</td>\n",
2021-05-07 22:16:28 +02:00
" <td>1.000000</td>\n",
2021-05-29 13:05:04 +02:00
" <td>0.471140</td>\n",
" <td>5.525633</td>\n",
" <td>0.858103</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>150</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.089802</td>\n",
" <td>2.878570</td>\n",
" <td>0.222587</td>\n",
" <td>0.153552</td>\n",
" <td>0.149134</td>\n",
" <td>0.172670</td>\n",
" <td>0.141524</td>\n",
" <td>0.173480</td>\n",
" <td>0.254501</td>\n",
" <td>0.134119</td>\n",
" <td>0.446513</td>\n",
" <td>0.573975</td>\n",
" <td>0.825027</td>\n",
2021-05-07 22:16:28 +02:00
" <td>1.000000</td>\n",
2021-05-29 13:05:04 +02:00
" <td>0.448773</td>\n",
" <td>5.497452</td>\n",
" <td>0.862723</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>170</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.071050</td>\n",
" <td>2.859003</td>\n",
" <td>0.218028</td>\n",
" <td>0.155008</td>\n",
" <td>0.147994</td>\n",
" <td>0.170192</td>\n",
" <td>0.142167</td>\n",
" <td>0.169686</td>\n",
" <td>0.247942</td>\n",
" <td>0.131564</td>\n",
" <td>0.420853</td>\n",
" <td>0.574683</td>\n",
" <td>0.831389</td>\n",
" <td>0.999894</td>\n",
" <td>0.459596</td>\n",
" <td>5.513748</td>\n",
" <td>0.859285</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>190</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.054821</td>\n",
" <td>2.841333</td>\n",
" <td>0.209544</td>\n",
" <td>0.148378</td>\n",
" <td>0.142746</td>\n",
" <td>0.164267</td>\n",
" <td>0.138948</td>\n",
" <td>0.172691</td>\n",
" <td>0.242354</td>\n",
" <td>0.126092</td>\n",
" <td>0.432985</td>\n",
" <td>0.571345</td>\n",
" <td>0.815483</td>\n",
" <td>1.000000</td>\n",
" <td>0.485570</td>\n",
" <td>5.525070</td>\n",
" <td>0.855854</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Alpha RMSE MAE precision recall F_1 F_05 \\\n",
2021-05-29 13:05:04 +02:00
"0 1 3.667029 3.488836 0.096713 0.071467 0.067186 0.076112 \n",
"0 3 3.628018 3.448919 0.147826 0.103487 0.099285 0.114498 \n",
"0 7 3.556791 3.375833 0.218982 0.146258 0.143279 0.167046 \n",
"0 10 3.510641 3.328261 0.243902 0.164535 0.160648 0.187002 \n",
"0 30 3.355705 3.164734 0.267550 0.188253 0.180257 0.207965 \n",
"0 50 3.273295 3.075884 0.267444 0.190215 0.180829 0.207961 \n",
"0 70 3.216138 3.014393 0.250795 0.177880 0.170805 0.196090 \n",
"0 90 3.173423 2.969030 0.248993 0.176408 0.169005 0.194434 \n",
"0 110 3.142262 2.935499 0.229056 0.161752 0.153913 0.177522 \n",
"0 130 3.111596 2.902799 0.225769 0.159152 0.152693 0.175730 \n",
"0 150 3.089802 2.878570 0.222587 0.153552 0.149134 0.172670 \n",
"0 170 3.071050 2.859003 0.218028 0.155008 0.147994 0.170192 \n",
"0 190 3.054821 2.841333 0.209544 0.148378 0.142746 0.164267 \n",
2021-05-07 22:16:28 +02:00
"\n",
" precision_super recall_super NDCG mAP MRR LAUC \\\n",
2021-05-29 13:05:04 +02:00
"0 0.054506 0.074237 0.118179 0.053270 0.260293 0.532465 \n",
"0 0.090021 0.107763 0.176286 0.085984 0.353319 0.548672 \n",
"0 0.142811 0.167806 0.263200 0.146602 0.481264 0.570360 \n",
"0 0.162768 0.191288 0.295153 0.169161 0.524880 0.579620 \n",
"0 0.179185 0.218573 0.319477 0.184459 0.542908 0.591560 \n",
"0 0.176288 0.218506 0.317305 0.183281 0.525159 0.592538 \n",
"0 0.162017 0.197930 0.295512 0.165824 0.506624 0.586287 \n",
"0 0.164056 0.205219 0.294456 0.166556 0.504511 0.585555 \n",
"0 0.147961 0.179891 0.264254 0.142434 0.451158 0.578115 \n",
"0 0.146674 0.181274 0.259610 0.139655 0.443082 0.576798 \n",
"0 0.141524 0.173480 0.254501 0.134119 0.446513 0.573975 \n",
"0 0.142167 0.169686 0.247942 0.131564 0.420853 0.574683 \n",
"0 0.138948 0.172691 0.242354 0.126092 0.432985 0.571345 \n",
2021-05-07 22:16:28 +02:00
"\n",
" HR Reco in test Test coverage Shannon Gini \n",
2021-05-29 13:05:04 +02:00
"0 0.568399 0.994168 0.734488 6.693136 0.541379 \n",
"0 0.709438 0.994168 0.697691 6.626478 0.573536 \n",
"0 0.812301 0.997667 0.628427 6.318295 0.692941 \n",
"0 0.848356 0.997879 0.585137 6.098749 0.751148 \n",
"0 0.888653 1.000000 0.484848 5.570362 0.845232 \n",
"0 0.880170 1.000000 0.455267 5.520912 0.854004 \n",
"0 0.862142 1.000000 0.453102 5.501145 0.858617 \n",
"0 0.871686 1.000000 0.453102 5.524114 0.856353 \n",
"0 0.846235 0.999894 0.462482 5.455059 0.863686 \n",
"0 0.834571 1.000000 0.471140 5.525633 0.858103 \n",
"0 0.825027 1.000000 0.448773 5.497452 0.862723 \n",
"0 0.831389 0.999894 0.459596 5.513748 0.859285 \n",
"0 0.815483 1.000000 0.485570 5.525070 0.855854 "
2021-05-07 22:16:28 +02:00
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from tqdm import tqdm\n",
"result=[]\n",
"for alpha in tqdm([1, 3, 7]+[i for i in np.arange(10,200,20)]):\n",
" train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n",
" test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
" train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)\n",
" \n",
" train_ui*=alpha\n",
" train_iu=train_ui.transpose().tocsr()\n",
" \n",
" model = implicit.als.AlternatingLeastSquares(factors=factors, regularization=0.1, iterations=10)\n",
" model.fit(train_iu, show_progress=False)\n",
" \n",
" reco=top_k_recommendations(model, user_code_id, item_code_id, topK=10)\n",
" estimations_df=pd.DataFrame(estimate(model, user_code_id, item_code_id, test_ui))\n",
" \n",
" to_append=ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
" estimations_df=estimations_df, \n",
" reco=np.array(reco),\n",
" super_reactions=[4,5])\n",
" to_append.insert(0, \"Alpha\", alpha)\n",
" result.append(to_append)\n",
" \n",
"result=pd.concat(result)\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
2021-05-29 13:05:04 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABCAAAAkoCAYAAAC6aM7IAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd3iV5f3H8fc3e4eRARnssDcRQQHFCS5cOGrVuqite7S122rbX+uoo3XU0arVCuLeW0SrIAHC3kMIKwFkhuz798c5iSEGZCR5nuR8Xtd1rpzzjHM+5wRuHr7nHuacQ0RERERERESkMYV5HUBEREREREREWj4VIERERERERESk0akAISIiIiIiIiKNTgUIEREREREREWl0KkCIiIiIiIiISKNTAUJEREREREREGp0KECIiIiIiIi2UmT1qZr89gOMWmNmxjZ9IQpkKEOJ7ZrbazPaY2S4z22hmT5lZQnDfU2bmzOyMOufcH9z+o+DjKDO718wKgs+zyszu28drVN/+0aRvVESkmQi2mWVmllJne36w7e1Ua9vtwW1D6xz7IzOrrNPu7jKzjCZ6GyIiIcE5d7Vz7s4DOK6Pc25KE0SSEKYChDQXpzvnEoCBwCDgl7X2LQUurX5gZhHAeGBFrWN+CeQCQ4FEYDQwu77XqHW7tsHfhYhIy7EKuLD6gZn1A2JrH2BmBlwMbKVWO13Ll3Xa3QTn3PrGDC0i0hwFr29Fmj0VIKRZcc5tBN4jUIio9gZwtJm1Dj4eA8wFNtY65gjgFefcehew2jn3TFNkFhFpof4DXFLr8aVA3XZ1JJAB3ABcYGZRTZRNRKRZCPYo+6WZLTSzb8zs32YWY2bHBnvu/sLMNgL/NrMwM7vNzFaY2RYze8HM2tR6rhFm9oWZbTOztbV6Aj9lZn8M3k8xszeDx2w1s8/MLKxWlhOC96ODPYrXB2/3m1l0cF91tlvMrNDMNpjZZU392UnzpAKENCtmlgWMBZbX2lwCvA5cEHx8Cd+9CJ4G3GxmPzWzfsFv5URE5NBNA5LMrJeZhQPnA8/WOeZSAkXiScHHpzVhPhGR5uIi4GSgK9Ad+E1wezugDdARmABcD5wJHEOguPsN8BCAmXUA3gH+DqQS+LIuv57XugUoCB6TDvwKcPUc92tgWPB5BhDoRfybWvvbAclAJnAF8FCtLwNF9kkFCGkuXjWzncBaoBD4fZ39zwCXmFkygUb51Tr7/w/4K4EGPg9YZ2Z1uwO/GqwGV9+uaug3ISLSwlT3gjgRWAysq95hZnEEhsP91zlXDrzId4dhDKvT7q5ARCT0/MM5t9Y5txX4E98Ob6sCfu+cK3XO7QF+DPzaOVfgnCsFbgfODQ7PuAj40Dn3vHOu3Dm3xTmXX89rlQPtgY7B4z5zztVXgLgIuMM5V+icKwL+QGBIXe3nuSP4HG8Du4Aeh/k5SAhQAUKaizOdc4nAsUBPYK+Jz5xznxOo5P4GeDPYSNfeX+mce8g5dzTQikDj/i8z61XnNVrVuj3eeG9HRKRF+A/wA+BHfLfn2VlABfB28PFzwFgzS611zLQ67W7Xxg4sIuJDa2vd/5pA7waAIudcSa19HYFXqou2wCKgkkBPhmz2nv9sX+4m0JP4fTNbaWa37eO4jGCW+nIBbHHOVdR6XAwkHMDrS4hTAUKaFefcp8BTwD317H6WQLey/c7t4Jzb45x7iEC3td4NnVFEJFQ4574mMBnlKcDLdXZfSuBidE1w/PJkIJJaE1eKiAgQKB5U6wBUT8Zbt2fCWmBsncJtjHNuXXDf9xZxnXM7nXO3OOe6AKcTGKJ8fD2HridQ8Kgvl8ghUwFCmqP7gRPNbGCd7Q8S6AY8te4JZnZjcMKcWDOLCA6/SOS7K2GIiMjBuQI4zjm3u9a2TOB4AnM+DOTbMcR/pf7VMEREQtk1ZpYVnFDyV3w7b05djwJ/MrOOAGaWambjgvueA04ws/OC17pt67lWxsxOM7NuwfnQdhDoQVFZz2s9D/wm+BopwO/47jw/IgdNBQhpdoLj0J4Bfltn+1bn3Ef7GMe2B7iXwMoYm4FrgHOccytrHfNGnbXoX2mktyAi0mI451Y45/LqbB4J5Dvn3nfObay+ESgU9zezvsHjhtdpd3eZ2RFN+gZERLz3X+B9YGXw9sd9HPcAgYnX3w/OjTYNOBLAObeGQG+0WwgsfZxPoPBbVw7wIYE5G74EHnbOTannuD8SmDdtLjAPmLWfXCIHzOr/v5qIiIiIiIg0JjNbDVzpnPvQ6ywiTUE9IERERERERESk0akAISIiIiIiIiKNTkMwRERERERERKTRqQeEiIiIiEiQmY0xsyVmttzMbqtn/0VmNjd4+8LMBgS39zCz/Fq3HWZ2Y3Df7Wa2rta+U5r4bYmI+IJ6QIiIiIiIAGYWDiwlsKx3ATADuNA5t7DWMUcBi5xz35jZWOB259yR9TzPOuBI59zXZnY7sMs5d08TvRUREV+K8DpAfVJSUlynTp28jiEispeZM2duds6lep2jKagdFhE/aoJ2eCiwvHqZbjObCIwDagoQzrkvah0/Dciq53mOB1Y4574+nDBqi0XEjw6nLfZlAaJTp07k5dVdUlxExFtmdlgXks2J2mER8aMmaIczgbW1HhcAR+7jWIArgHfq2X4B8Hydbdea2SVAHnCLc+6b7wujtlhE/Ohw2mLNASEiIiIiEmD1bKt3vLKZjSZQgPhFne1RwBnA5FqbHwG6AgOBDcC9+wxgNsHM8swsr6io6KDCi4j4nQoQIiIiIiIBBUB2rcdZwPq6B5lZf+AJYJxzbkud3WOBWc65TdUbnHObnHOVzrkq4HECQz3q5Zx7zDmX65zLTU0NiVF/IhJCVIAQEREREQmYAeSYWedgT4YLgNdrH2BmHYCXgYudc0vreY4LqTP8wsza13p4FjC/QVOLiDQTvpwDQkS8V15eTkFBASUlJV5HaXIxMTFkZWURGRnpdRQRkXq19Dbaq3bYOVdhZtcC7wHhwL+ccwvM7Org/keB3wFtgYfNDKDCOZcLYGZxBFbQ+HGdp77LzAYSGM6xup79ItIIWnpb2dgaoy1WAUJE6lVQUEBiYiKdOnUieIEVEpxzbNmyhYKCAjp37ux1nO9lZjHAVCCaQJv+onPu93WOORZ4DVgV3PSyc+6OJowpIg2sJbfRXrfDzrm3gbfrbHu01v0rgSv3cW4xgeJE3e0XN3BMETkALbmtbGyN1RZrCIaI1KukpIS2bduGXGNtZrRt27Y5VcpLgeOccwMITG42xsyG1XPcZ865gcGbig8izVxLbqObYTssIj7VktvKxtZYbbF6QIjIPoVqY92c3rdzzgG7gg8jg7d6Z2wXkZalObVVB6slvzcRaVpqTw5dY3x26gEhIr4VHh7OwIED6du3L6effjrbtm0DYPXq1ZgZv/3tb2uO3bx5M5GRkVx77bUALFmyhGOPPZaBAwfSq1cvJkyYAMCUKVNITk5m4MCBNbcPP/ywyd9bQzKzcDPLBwqBD5xz0+s5bLiZzTGzd8yszz6eR0u/icgBMzMuvvjbkQUVFRWkpqZy2mmn7XXcuHHjGD58+F7bbr/9djIzM/dqi6vbeBER2b+8vDyuv/76fe5fv3495557bhMmOnAqQIiIb8XGxpKfn8/8+fNp06YNDz30UM2+Ll268Oabb9Y8njx5Mn36fPv/6uuvv56bbrqJ/Px8Fi1axHXXXVezb+TIkeTn59fcTjjhhKZ5Q40kuLTbQALLxQ01s751DpkFdAwO0/g78Oo+nkdLv4nIAYuPj2f+/Pns2bMHgA8++IDMzMy9jtm2bRuzZs1i27ZtrFq1aq991W109a1Vq1ZNFV1ExFcqKysP6vjc3FwefPDBfe7PyMjgxRdfPNxYjUIFCBFpFoYPH866detqHsfGxtKrVy/y8vIAmDRpEuedd17N/g0bNpCVlVXzuF+/fk0X1iPOuW3AFGBMne07nHO7gvffBiLNLKXJA4pIizN27FjeeustAJ5//nkuvPDCvfa/9NJLnH766VxwwQVMnDjRi4giIp5avXo1PXv25NJLL6V///6ce+65FBc
2021-05-07 22:16:28 +02:00
"text/plain": [
"<Figure size 1296x3024 with 18 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"metrics=list(result.columns[[i not in ['Alpha'] for i in result.columns]])\n",
"\n",
"charts_per_row=6\n",
"charts_per_column=3\n",
"\n",
"fig, axes = plt.subplots(nrows=charts_per_row, ncols=charts_per_column,figsize=(18, 7*charts_per_row ))\n",
"import itertools\n",
"to_iter=[i for i in itertools.product(range(charts_per_row), range(charts_per_column))]\n",
"\n",
"for i in range(len(metrics)):\n",
" df=result[['Alpha', metrics[i]]]\n",
" df.plot(ax=axes[to_iter[i]], title=metrics[i], x=0, y=1)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2021-05-29 13:05:04 +02:00
"943it [00:00, 10551.89it/s]\n",
"943it [00:00, 12056.38it/s]\n",
"943it [00:00, 11744.01it/s]\n",
"943it [00:00, 11899.93it/s]\n",
"943it [00:00, 11421.00it/s]\n",
"943it [00:00, 12041.26it/s]\n",
"943it [00:00, 10597.41it/s]\n",
"943it [00:00, 10562.60it/s]\n",
"943it [00:00, 12095.76it/s]\n",
"943it [00:00, 11264.41it/s]\n",
"943it [00:00, 11571.70it/s]\n",
"943it [00:00, 11714.27it/s]\n",
"943it [00:00, 11105.58it/s]\n",
"943it [00:00, 11365.66it/s]\n",
"943it [00:00, 11612.74it/s]\n"
2021-05-07 22:16:28 +02:00
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Model</th>\n",
" <th>RMSE</th>\n",
" <th>MAE</th>\n",
" <th>precision</th>\n",
" <th>recall</th>\n",
" <th>F_1</th>\n",
" <th>F_05</th>\n",
" <th>precision_super</th>\n",
" <th>recall_super</th>\n",
" <th>NDCG</th>\n",
" <th>mAP</th>\n",
" <th>MRR</th>\n",
" <th>LAUC</th>\n",
" <th>HR</th>\n",
" <th>Reco in test</th>\n",
" <th>Test coverage</th>\n",
" <th>Shannon</th>\n",
" <th>Gini</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_P3</td>\n",
" <td>3.702446</td>\n",
" <td>3.527273</td>\n",
" <td>0.282185</td>\n",
" <td>0.192092</td>\n",
" <td>0.186749</td>\n",
" <td>0.216980</td>\n",
" <td>0.204185</td>\n",
" <td>0.240096</td>\n",
" <td>0.339114</td>\n",
" <td>0.204905</td>\n",
" <td>0.572157</td>\n",
" <td>0.593544</td>\n",
" <td>0.875928</td>\n",
" <td>1.000000</td>\n",
" <td>0.077201</td>\n",
" <td>3.875892</td>\n",
" <td>0.974947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_ImplicitALS</td>\n",
2021-05-29 13:05:04 +02:00
" <td>3.269156</td>\n",
" <td>3.070003</td>\n",
" <td>0.257582</td>\n",
" <td>0.186640</td>\n",
" <td>0.178445</td>\n",
" <td>0.202974</td>\n",
" <td>0.171137</td>\n",
" <td>0.216258</td>\n",
" <td>0.308415</td>\n",
" <td>0.175796</td>\n",
" <td>0.532835</td>\n",
" <td>0.590709</td>\n",
2021-05-07 22:16:28 +02:00
" <td>0.878049</td>\n",
2021-05-29 13:05:04 +02:00
" <td>0.999788</td>\n",
2021-05-07 22:16:28 +02:00
" <td>0.504329</td>\n",
2021-05-29 13:05:04 +02:00
" <td>5.761941</td>\n",
" <td>0.820874</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_TopPop</td>\n",
" <td>2.508258</td>\n",
" <td>2.217909</td>\n",
" <td>0.188865</td>\n",
" <td>0.116919</td>\n",
" <td>0.118732</td>\n",
" <td>0.141584</td>\n",
" <td>0.130472</td>\n",
" <td>0.137473</td>\n",
" <td>0.214651</td>\n",
" <td>0.111707</td>\n",
" <td>0.400939</td>\n",
" <td>0.555546</td>\n",
" <td>0.765642</td>\n",
" <td>1.000000</td>\n",
" <td>0.038961</td>\n",
" <td>3.159079</td>\n",
" <td>0.987317</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_SVD</td>\n",
2021-05-29 13:05:04 +02:00
" <td>0.951652</td>\n",
" <td>0.750975</td>\n",
" <td>0.096394</td>\n",
" <td>0.047252</td>\n",
" <td>0.052870</td>\n",
" <td>0.067257</td>\n",
" <td>0.085515</td>\n",
" <td>0.074754</td>\n",
" <td>0.109578</td>\n",
" <td>0.051562</td>\n",
" <td>0.235567</td>\n",
" <td>0.520341</td>\n",
" <td>0.496288</td>\n",
" <td>0.995546</td>\n",
" <td>0.208514</td>\n",
" <td>4.455755</td>\n",
" <td>0.951624</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_SVD</td>\n",
2021-05-29 13:05:04 +02:00
" <td>0.914393</td>\n",
" <td>0.717199</td>\n",
" <td>0.101697</td>\n",
" <td>0.042334</td>\n",
" <td>0.051787</td>\n",
" <td>0.068811</td>\n",
" <td>0.092489</td>\n",
" <td>0.072360</td>\n",
" <td>0.104839</td>\n",
" <td>0.048970</td>\n",
" <td>0.196117</td>\n",
" <td>0.517889</td>\n",
" <td>0.480382</td>\n",
" <td>0.867338</td>\n",
" <td>0.147186</td>\n",
" <td>3.852545</td>\n",
" <td>0.972694</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_Baseline</td>\n",
" <td>0.949459</td>\n",
" <td>0.752487</td>\n",
" <td>0.091410</td>\n",
" <td>0.037652</td>\n",
" <td>0.046030</td>\n",
" <td>0.061286</td>\n",
" <td>0.079614</td>\n",
" <td>0.056463</td>\n",
" <td>0.095957</td>\n",
" <td>0.043178</td>\n",
" <td>0.198193</td>\n",
" <td>0.515501</td>\n",
" <td>0.437964</td>\n",
" <td>1.000000</td>\n",
" <td>0.033911</td>\n",
" <td>2.836513</td>\n",
" <td>0.991139</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_SVDBiased</td>\n",
2021-05-29 13:05:04 +02:00
" <td>0.940413</td>\n",
" <td>0.739571</td>\n",
" <td>0.086002</td>\n",
" <td>0.035478</td>\n",
" <td>0.043196</td>\n",
" <td>0.057507</td>\n",
" <td>0.075751</td>\n",
" <td>0.053460</td>\n",
" <td>0.094897</td>\n",
" <td>0.043361</td>\n",
" <td>0.209124</td>\n",
" <td>0.514405</td>\n",
" <td>0.428420</td>\n",
" <td>0.997349</td>\n",
" <td>0.177489</td>\n",
" <td>4.212509</td>\n",
" <td>0.962656</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_Random</td>\n",
2021-05-29 13:05:04 +02:00
" <td>1.527935</td>\n",
" <td>1.225393</td>\n",
" <td>0.049311</td>\n",
" <td>0.020479</td>\n",
" <td>0.024944</td>\n",
" <td>0.032990</td>\n",
" <td>0.032189</td>\n",
" <td>0.024725</td>\n",
" <td>0.053647</td>\n",
" <td>0.020462</td>\n",
" <td>0.136036</td>\n",
" <td>0.506763</td>\n",
" <td>0.339343</td>\n",
" <td>0.986108</td>\n",
" <td>0.191198</td>\n",
" <td>5.101215</td>\n",
" <td>0.907796</td>\n",
2021-05-07 22:16:28 +02:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_I-KNN</td>\n",
" <td>1.030386</td>\n",
" <td>0.813067</td>\n",
" <td>0.026087</td>\n",
" <td>0.006908</td>\n",
" <td>0.010593</td>\n",
" <td>0.016046</td>\n",
" <td>0.021137</td>\n",
" <td>0.009522</td>\n",
" <td>0.024214</td>\n",
" <td>0.008958</td>\n",
" <td>0.048068</td>\n",
" <td>0.499885</td>\n",
" <td>0.154825</td>\n",
" <td>0.402333</td>\n",
" <td>0.434343</td>\n",
" <td>5.133650</td>\n",
" <td>0.877999</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_I-KNNBaseline</td>\n",
" <td>0.935327</td>\n",
" <td>0.737424</td>\n",
" <td>0.002545</td>\n",
" <td>0.000755</td>\n",
" <td>0.001105</td>\n",
" <td>0.001602</td>\n",
" <td>0.002253</td>\n",
" <td>0.000930</td>\n",
" <td>0.003444</td>\n",
" <td>0.001362</td>\n",
" <td>0.011760</td>\n",
" <td>0.496724</td>\n",
" <td>0.021209</td>\n",
" <td>0.482821</td>\n",
" <td>0.059885</td>\n",
" <td>2.232578</td>\n",
" <td>0.994487</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_U-KNN</td>\n",
" <td>1.023495</td>\n",
" <td>0.807913</td>\n",
" <td>0.000742</td>\n",
" <td>0.000205</td>\n",
" <td>0.000305</td>\n",
" <td>0.000449</td>\n",
" <td>0.000536</td>\n",
" <td>0.000198</td>\n",
" <td>0.000845</td>\n",
" <td>0.000274</td>\n",
" <td>0.002744</td>\n",
" <td>0.496441</td>\n",
" <td>0.007423</td>\n",
" <td>0.602121</td>\n",
" <td>0.010823</td>\n",
" <td>2.089186</td>\n",
" <td>0.995706</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
2021-05-29 13:05:04 +02:00
" <td>Self_BaselineIU</td>\n",
" <td>0.958136</td>\n",
" <td>0.754051</td>\n",
" <td>0.000954</td>\n",
" <td>0.000188</td>\n",
" <td>0.000298</td>\n",
" <td>0.000481</td>\n",
" <td>0.000644</td>\n",
" <td>0.000223</td>\n",
" <td>0.001043</td>\n",
" <td>0.000335</td>\n",
" <td>0.003348</td>\n",
" <td>0.496433</td>\n",
" <td>0.009544</td>\n",
" <td>0.699046</td>\n",
" <td>0.005051</td>\n",
" <td>1.945910</td>\n",
" <td>0.995669</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
2021-05-07 22:16:28 +02:00
" <td>Self_TopRated</td>\n",
" <td>1.030712</td>\n",
" <td>0.820904</td>\n",
" <td>0.000954</td>\n",
" <td>0.000188</td>\n",
" <td>0.000298</td>\n",
" <td>0.000481</td>\n",
" <td>0.000644</td>\n",
" <td>0.000223</td>\n",
" <td>0.001043</td>\n",
" <td>0.000335</td>\n",
" <td>0.003348</td>\n",
" <td>0.496433</td>\n",
" <td>0.009544</td>\n",
" <td>0.699046</td>\n",
" <td>0.005051</td>\n",
" <td>1.945910</td>\n",
" <td>0.995669</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_BaselineUI</td>\n",
" <td>0.967585</td>\n",
" <td>0.762740</td>\n",
" <td>0.000954</td>\n",
" <td>0.000170</td>\n",
" <td>0.000278</td>\n",
" <td>0.000463</td>\n",
" <td>0.000644</td>\n",
" <td>0.000189</td>\n",
" <td>0.000752</td>\n",
" <td>0.000168</td>\n",
" <td>0.001677</td>\n",
" <td>0.496424</td>\n",
" <td>0.009544</td>\n",
" <td>0.600530</td>\n",
" <td>0.005051</td>\n",
" <td>1.803126</td>\n",
" <td>0.996380</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_IKNN</td>\n",
" <td>1.018363</td>\n",
" <td>0.808793</td>\n",
" <td>0.000318</td>\n",
" <td>0.000108</td>\n",
" <td>0.000140</td>\n",
" <td>0.000189</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000214</td>\n",
" <td>0.000037</td>\n",
" <td>0.000368</td>\n",
" <td>0.496391</td>\n",
" <td>0.003181</td>\n",
" <td>0.392153</td>\n",
" <td>0.115440</td>\n",
" <td>4.174741</td>\n",
" <td>0.965327</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Model RMSE MAE precision recall F_1 \\\n",
"0 Self_P3 3.702446 3.527273 0.282185 0.192092 0.186749 \n",
2021-05-29 13:05:04 +02:00
"0 Ready_ImplicitALS 3.269156 3.070003 0.257582 0.186640 0.178445 \n",
2021-05-07 22:16:28 +02:00
"0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n",
2021-05-29 13:05:04 +02:00
"0 Ready_SVD 0.951652 0.750975 0.096394 0.047252 0.052870 \n",
"0 Self_SVD 0.914393 0.717199 0.101697 0.042334 0.051787 \n",
2021-05-07 22:16:28 +02:00
"0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n",
2021-05-29 13:05:04 +02:00
"0 Ready_SVDBiased 0.940413 0.739571 0.086002 0.035478 0.043196 \n",
"0 Ready_Random 1.527935 1.225393 0.049311 0.020479 0.024944 \n",
2021-05-07 22:16:28 +02:00
"0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 \n",
"0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 \n",
"0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 \n",
2021-05-29 13:05:04 +02:00
"0 Self_BaselineIU 0.958136 0.754051 0.000954 0.000188 0.000298 \n",
2021-05-07 22:16:28 +02:00
"0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n",
"0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n",
"0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n",
"\n",
" F_05 precision_super recall_super NDCG mAP MRR \\\n",
"0 0.216980 0.204185 0.240096 0.339114 0.204905 0.572157 \n",
2021-05-29 13:05:04 +02:00
"0 0.202974 0.171137 0.216258 0.308415 0.175796 0.532835 \n",
2021-05-07 22:16:28 +02:00
"0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n",
2021-05-29 13:05:04 +02:00
"0 0.067257 0.085515 0.074754 0.109578 0.051562 0.235567 \n",
"0 0.068811 0.092489 0.072360 0.104839 0.048970 0.196117 \n",
2021-05-07 22:16:28 +02:00
"0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n",
2021-05-29 13:05:04 +02:00
"0 0.057507 0.075751 0.053460 0.094897 0.043361 0.209124 \n",
"0 0.032990 0.032189 0.024725 0.053647 0.020462 0.136036 \n",
2021-05-07 22:16:28 +02:00
"0 0.016046 0.021137 0.009522 0.024214 0.008958 0.048068 \n",
"0 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 \n",
"0 0.000449 0.000536 0.000198 0.000845 0.000274 0.002744 \n",
"0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n",
2021-05-29 13:05:04 +02:00
"0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n",
2021-05-07 22:16:28 +02:00
"0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n",
"0 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 \n",
"\n",
" LAUC HR Reco in test Test coverage Shannon Gini \n",
"0 0.593544 0.875928 1.000000 0.077201 3.875892 0.974947 \n",
2021-05-29 13:05:04 +02:00
"0 0.590709 0.878049 0.999788 0.504329 5.761941 0.820874 \n",
2021-05-07 22:16:28 +02:00
"0 0.555546 0.765642 1.000000 0.038961 3.159079 0.987317 \n",
2021-05-29 13:05:04 +02:00
"0 0.520341 0.496288 0.995546 0.208514 4.455755 0.951624 \n",
"0 0.517889 0.480382 0.867338 0.147186 3.852545 0.972694 \n",
2021-05-07 22:16:28 +02:00
"0 0.515501 0.437964 1.000000 0.033911 2.836513 0.991139 \n",
2021-05-29 13:05:04 +02:00
"0 0.514405 0.428420 0.997349 0.177489 4.212509 0.962656 \n",
"0 0.506763 0.339343 0.986108 0.191198 5.101215 0.907796 \n",
2021-05-07 22:16:28 +02:00
"0 0.499885 0.154825 0.402333 0.434343 5.133650 0.877999 \n",
"0 0.496724 0.021209 0.482821 0.059885 2.232578 0.994487 \n",
"0 0.496441 0.007423 0.602121 0.010823 2.089186 0.995706 \n",
"0 0.496433 0.009544 0.699046 0.005051 1.945910 0.995669 \n",
2021-05-29 13:05:04 +02:00
"0 0.496433 0.009544 0.699046 0.005051 1.945910 0.995669 \n",
2021-05-07 22:16:28 +02:00
"0 0.496424 0.009544 0.600530 0.005051 1.803126 0.996380 \n",
"0 0.496391 0.003181 0.392153 0.115440 4.174741 0.965327 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import evaluation_measures as ev\n",
"\n",
"dir_path=\"Recommendations generated/ml-100k/\"\n",
"super_reactions=[4,5]\n",
"test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
"\n",
"ev.evaluate_all(test, dir_path, super_reactions)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# project task 7: Check how number of iterations of WRMF model influence the evaluation metrics"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# we already checked how parameters alpha and factors influence the model performance\n",
"# your task is to do a similar thing with param iterations\n",
"\n",
"# expected output is a table reporting the evaluation metrics"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}