{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Prepare test set" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import scipy.sparse as sparse\n", "from collections import defaultdict\n", "from itertools import chain\n", "import random\n", "from tqdm import tqdm\n", "\n", "# In evaluation we do not load train set - it is not needed\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "test.columns=['user', 'item', 'rating', 'timestamp']\n", "\n", "test['user_code'] = test['user'].astype(\"category\").cat.codes\n", "test['item_code'] = test['item'].astype(\"category\").cat.codes\n", "\n", "user_code_id = dict(enumerate(test['user'].astype(\"category\").cat.categories))\n", "user_id_code = dict((v, k) for k, v in user_code_id.items())\n", "item_code_id = dict(enumerate(test['item'].astype(\"category\").cat.categories))\n", "item_id_code = dict((v, k) for k, v in item_code_id.items())\n", "\n", "test_ui = sparse.csr_matrix((test['rating'], (test['user_code'], test['item_code'])))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Estimations metrics" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)\n", "estimations_df.columns=['user', 'item' ,'score']\n", "\n", "estimations_df['user_code']=[user_id_code[user] for user in estimations_df['user']]\n", "estimations_df['item_code']=[item_id_code[item] for item in estimations_df['item']]\n", "estimations=sparse.csr_matrix((estimations_df['score'], (estimations_df['user_code'], estimations_df['item_code'])), shape=test_ui.shape)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def estimations_metrics(test_ui, estimations):\n", " result=[]\n", "\n", " RMSE=(np.sum((estimations.data-test_ui.data)**2)/estimations.nnz)**(1/2)\n", " result.append(['RMSE', RMSE])\n", "\n", " MAE=np.sum(abs(estimations.data-test_ui.data))/estimations.nnz\n", " result.append(['MAE', MAE])\n", " \n", " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n", " df_result.columns=list(zip(*result))[0]\n", " return df_result" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RMSEMAE
00.9494590.752487
\n", "
" ], "text/plain": [ " RMSE MAE\n", "0 0.949459 0.752487" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# in case of error (in the laboratories) you might have to switch to the other version of pandas\n", "# try !pip3 install pandas=='1.0.3' (or pip if you use python 2) and restart the kernel\n", "\n", "estimations_metrics(test_ui, estimations)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Ranking metrics" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[663, 475, 62, ..., 472, 269, 503],\n", " [ 48, 313, 475, ..., 591, 175, 466],\n", " [351, 313, 475, ..., 591, 175, 466],\n", " ...,\n", " [259, 313, 475, ..., 11, 591, 175],\n", " [ 33, 313, 475, ..., 11, 591, 175],\n", " [ 77, 313, 475, ..., 11, 591, 175]])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "reco = np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')\n", "# Let's ignore scores - they are not used in evaluation: \n", "users=reco[:,:1]\n", "items=reco[:,1::2]\n", "# Let's use inner ids instead of real ones\n", "users=np.vectorize(lambda x: user_id_code.setdefault(x, -1))(users)\n", "items=np.vectorize(lambda x: item_id_code.setdefault(x, -1))(items) # maybe items we recommend are not in test set\n", "# Let's put them into one array\n", "reco=np.concatenate((users, items), axis=1)\n", "reco" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def ranking_metrics(test_ui, reco, super_reactions=[], topK=10):\n", " \n", " nb_items=test_ui.shape[1]\n", " relevant_users, super_relevant_users, prec, rec, F_1, F_05, prec_super, rec_super, ndcg, mAP, MRR, LAUC, HR=\\\n", " 0,0,0,0,0,0,0,0,0,0,0,0,0\n", " \n", " cg = (1.0 / np.log2(np.arange(2, topK + 2)))\n", " cg_sum = np.cumsum(cg)\n", " \n", " for (nb_user, user) in tqdm(enumerate(reco[:,0])):\n", " u_rated_items=test_ui.indices[test_ui.indptr[user]:test_ui.indptr[user+1]]\n", " nb_u_rated_items=len(u_rated_items)\n", " if nb_u_rated_items>0: # skip users with no items in test set (still possible that there will be no super items)\n", " relevant_users+=1\n", " \n", " u_super_items=u_rated_items[np.vectorize(lambda x: x in super_reactions)\\\n", " (test_ui.data[test_ui.indptr[user]:test_ui.indptr[user+1]])]\n", " # more natural seems u_super_items=[item for item in u_rated_items if test_ui[user,item] in super_reactions]\n", " # but accesing test_ui[user,item] is expensive -we should avoid doing it\n", " if len(u_super_items)>0:\n", " super_relevant_users+=1\n", " \n", " user_successes=np.zeros(topK)\n", " nb_user_successes=0\n", " user_super_successes=np.zeros(topK)\n", " nb_user_super_successes=0\n", " \n", " # evaluation\n", " for (item_position,item) in enumerate(reco[nb_user,1:topK+1]):\n", " if item in u_rated_items:\n", " user_successes[item_position]=1\n", " nb_user_successes+=1\n", " if item in u_super_items:\n", " user_super_successes[item_position]=1\n", " nb_user_super_successes+=1\n", " \n", " prec_u=nb_user_successes/topK \n", " prec+=prec_u\n", " \n", " rec_u=nb_user_successes/nb_u_rated_items\n", " rec+=rec_u\n", " \n", " F_1+=2*(prec_u*rec_u)/(prec_u+rec_u) if prec_u+rec_u>0 else 0\n", " F_05+=(0.5**2+1)*(prec_u*rec_u)/(0.5**2*prec_u+rec_u) if prec_u+rec_u>0 else 0\n", " \n", " prec_super+=nb_user_super_successes/topK\n", " rec_super+=nb_user_super_successes/max(len(u_super_items),1) # to set 0 if no super items\n", " ndcg+=np.dot(user_successes,cg)/cg_sum[min(topK, nb_u_rated_items)-1]\n", " \n", " cumsum_successes=np.cumsum(user_successes)\n", " mAP+=np.dot(cumsum_successes/np.arange(1,topK+1), user_successes)/min(topK, nb_u_rated_items)\n", " MRR+=1/(user_successes.nonzero()[0][0]+1) if user_successes.nonzero()[0].size>0 else 0\n", " LAUC+=(np.dot(cumsum_successes, 1-user_successes)+\\\n", " (nb_user_successes+nb_u_rated_items)/2*((nb_items-nb_u_rated_items)-(topK-nb_user_successes)))/\\\n", " ((nb_items-nb_u_rated_items)*nb_u_rated_items)\n", " \n", " HR+=nb_user_successes>0\n", " \n", " \n", " result=[]\n", " result.append(('precision', prec/relevant_users))\n", " result.append(('recall', rec/relevant_users))\n", " result.append(('F_1', F_1/relevant_users))\n", " result.append(('F_05', F_05/relevant_users))\n", " result.append(('precision_super', prec_super/super_relevant_users))\n", " result.append(('recall_super', rec_super/super_relevant_users))\n", " result.append(('NDCG', ndcg/relevant_users))\n", " result.append(('mAP', mAP/relevant_users))\n", " result.append(('MRR', MRR/relevant_users))\n", " result.append(('LAUC', LAUC/relevant_users))\n", " result.append(('HR', HR/relevant_users))\n", "\n", " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n", " df_result.columns=list(zip(*result))[0]\n", " return df_result" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 2282.19it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
precisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHR
00.091410.0376520.046030.0612860.0796140.0564630.0959570.0431780.1981930.5155010.437964
\n", "
" ], "text/plain": [ " precision recall F_1 F_05 precision_super recall_super \\\n", "0 0.09141 0.037652 0.04603 0.061286 0.079614 0.056463 \n", "\n", " NDCG mAP MRR LAUC HR \n", "0 0.095957 0.043178 0.198193 0.515501 0.437964 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ranking_metrics(test_ui, reco, super_reactions=[4,5], topK=10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Diversity metrics" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def diversity_metrics(test_ui, reco, topK=10):\n", " \n", " frequencies=defaultdict(int)\n", " \n", " # let's assign 0 to all items in test set\n", " for item in list(set(test_ui.indices)):\n", " frequencies[item]=0\n", " \n", " # counting frequencies\n", " for item in reco[:,1:].flat:\n", " frequencies[item]+=1\n", " \n", " nb_reco_outside_test=frequencies[-1]\n", " del frequencies[-1]\n", " \n", " frequencies=np.array(list(frequencies.values()))\n", " \n", " nb_rec_items=len(frequencies[frequencies>0])\n", " nb_reco_inside_test=np.sum(frequencies)\n", " \n", " frequencies=frequencies/np.sum(frequencies)\n", " frequencies=np.sort(frequencies)\n", " \n", " with np.errstate(divide='ignore'): # let's put zeros put items with 0 frequency and ignore division warning\n", " log_frequencies=np.nan_to_num(np.log(frequencies), posinf=0, neginf=0)\n", " \n", " result=[]\n", " result.append(('Reco in test', nb_reco_inside_test/(nb_reco_inside_test+nb_reco_outside_test)))\n", " result.append(('Test coverage', nb_rec_items/test_ui.shape[1]))\n", " result.append(('Shannon', -np.dot(frequencies, log_frequencies)))\n", " result.append(('Gini', np.dot(frequencies, np.arange(1-len(frequencies), len(frequencies), 2))/(len(frequencies)-1)))\n", " \n", " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n", " df_result.columns=list(zip(*result))[0]\n", " return df_result" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Reco in testTest coverageShannonGini
01.00.0339112.8365130.991139
\n", "
" ], "text/plain": [ " Reco in test Test coverage Shannon Gini\n", "0 1.0 0.033911 2.836513 0.991139" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# in case of errors try !pip3 install numpy==1.18.4 (or pip if you use python 2) and restart the kernel\n", "\n", "import evaluation_measures as ev\n", "import imp\n", "imp.reload(ev)\n", "\n", "x=diversity_metrics(test_ui, reco, topK=10)\n", "x" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# To be used in other notebooks" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 2668.06it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRF_2Whole_averageReco in testTest coverageShannonGini
00.9494590.7524870.091410.0376520.046030.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379640.0395490.14191.00.0339112.8365130.991139
\n", "
" ], "text/plain": [ " RMSE MAE precision recall F_1 F_05 \\\n", "0 0.949459 0.752487 0.09141 0.037652 0.04603 0.061286 \n", "\n", " precision_super recall_super NDCG mAP MRR LAUC \\\n", "0 0.079614 0.056463 0.095957 0.043178 0.198193 0.515501 \n", "\n", " HR F_2 Whole_average Reco in test Test coverage Shannon \\\n", "0 0.437964 0.039549 0.1419 1.0 0.033911 2.836513 \n", "\n", " Gini \n", "0 0.991139 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import evaluation_measures as ev\n", "import imp\n", "imp.reload(ev)\n", "\n", "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)\n", "reco=np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')\n", "\n", "ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n", " estimations_df=estimations_df, \n", " reco=reco,\n", " super_reactions=[4,5])\n", "#also you can just type ev.evaluate_all(estimations_df, reco) - I put above values as default" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 3513.93it/s]\n", "943it [00:00, 5048.26it/s]\n", "943it [00:00, 4530.47it/s]\n", "943it [00:00, 5016.38it/s]\n", "943it [00:00, 3958.29it/s]\n", "943it [00:00, 4004.71it/s]\n", "943it [00:00, 4465.19it/s]\n", "943it [00:00, 4760.13it/s]\n", "943it [00:00, 4948.57it/s]\n", "943it [00:00, 3895.70it/s]\n", "943it [00:00, 4446.36it/s]\n", "943it [00:00, 5322.70it/s]\n", "943it [00:00, 4464.53it/s]\n", "943it [00:00, 5275.54it/s]\n", "943it [00:00, 5161.31it/s]\n", "943it [00:00, 2960.67it/s]\n", "943it [00:00, 4734.14it/s]\n", "943it [00:00, 3319.18it/s]\n" ] } ], "source": [ "import evaluation_measures as ev\n", "import imp\n", "imp.reload(ev)\n", "\n", "dir_path=\"Recommendations generated/ml-100k/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "\n", "df=ev.evaluate_all(test, dir_path, super_reactions)\n", "#also you can just type ev.evaluate_all() - I put above values as default" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_super
0Self_RP3Beta3.7029283.5277130.3226940.2160690.2121520.2475380.2452790.284983
0Self_P33.7024463.5272730.2821850.1920920.1867490.2169800.2041850.240096
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.137473
0Self_SVDBaseline3.6456663.4802460.1378580.0823980.0841510.1010630.1079400.109393
0Ready_SVD0.9525630.7501580.0944860.0462740.0513890.0656250.0826180.074150
0Self_SVD0.9148900.7179620.1029690.0423250.0520220.0693130.0935620.074994
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.056463
0Self_KNNSurprisetask0.9462550.7452090.0834570.0328480.0412270.0554930.0747850.048890
0Self_TopRated2.5082582.2179090.0793210.0326670.0399830.0531700.0688840.048582
0Ready_SVDBiased0.9421410.7427600.0812300.0323440.0403020.0539320.0726390.051126
0Self_GlobalAvg1.1257600.9435340.0611880.0259680.0313830.0413430.0405580.032107
0Ready_Random1.5256331.2257140.0477200.0220490.0254940.0328450.0290770.025015
0Ready_I-KNN1.0303860.8130670.0260870.0069080.0105930.0160460.0211370.009522
0Ready_I-KNNBaseline0.9353270.7374240.0025450.0007550.0011050.0016020.0022530.000930
0Ready_U-KNN1.0234950.8079130.0007420.0002050.0003050.0004490.0005360.000198
0Self_BaselineIU0.9581360.7540510.0009540.0001880.0002980.0004810.0006440.000223
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.000189
0Self_IKNN1.0183630.8087930.0003180.0001080.0001400.0001890.0000000.000000
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_RP3Beta 3.702928 3.527713 0.322694 0.216069 0.212152 \n", "0 Self_P3 3.702446 3.527273 0.282185 0.192092 0.186749 \n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Self_SVDBaseline 3.645666 3.480246 0.137858 0.082398 0.084151 \n", "0 Ready_SVD 0.952563 0.750158 0.094486 0.046274 0.051389 \n", "0 Self_SVD 0.914890 0.717962 0.102969 0.042325 0.052022 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", "0 Self_KNNSurprisetask 0.946255 0.745209 0.083457 0.032848 0.041227 \n", "0 Self_TopRated 2.508258 2.217909 0.079321 0.032667 0.039983 \n", "0 Ready_SVDBiased 0.942141 0.742760 0.081230 0.032344 0.040302 \n", "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", "0 Ready_Random 1.525633 1.225714 0.047720 0.022049 0.025494 \n", "0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 \n", "0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 \n", "0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 \n", "0 Self_BaselineIU 0.958136 0.754051 0.000954 0.000188 0.000298 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n", "\n", " F_05 precision_super recall_super \n", "0 0.247538 0.245279 0.284983 \n", "0 0.216980 0.204185 0.240096 \n", "0 0.141584 0.130472 0.137473 \n", "0 0.101063 0.107940 0.109393 \n", "0 0.065625 0.082618 0.074150 \n", "0 0.069313 0.093562 0.074994 \n", "0 0.061286 0.079614 0.056463 \n", "0 0.055493 0.074785 0.048890 \n", "0 0.053170 0.068884 0.048582 \n", "0 0.053932 0.072639 0.051126 \n", "0 0.041343 0.040558 0.032107 \n", "0 0.032845 0.029077 0.025015 \n", "0 0.016046 0.021137 0.009522 \n", "0 0.001602 0.002253 0.000930 \n", "0 0.000449 0.000536 0.000198 \n", "0 0.000481 0.000644 0.000223 \n", "0 0.000463 0.000644 0.000189 \n", "0 0.000189 0.000000 0.000000 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[:,:9]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelNDCGmAPMRRLAUCHRF_2Whole_averageReco in testTest coverageShannonGini
0Self_RP3Beta0.3882710.2482390.6363180.6056830.9109230.2054500.3769670.9997880.1789324.5496630.950182
0Self_P30.3391140.2049050.5721570.5935440.8759280.1817020.3408031.0000000.0772013.8758920.974947
0Self_TopPop0.2146510.1117070.4009390.5555460.7656420.1127500.2496071.0000000.0389613.1590790.987317
0Self_SVDBaseline0.1644770.0829730.3423740.5380970.6383880.0798600.2057480.9998940.2792215.1590760.907220
0Ready_SVD0.1093200.0513830.2406930.5198490.4750800.0462370.1547590.9934250.2063494.4429960.952832
0Self_SVD0.1054160.0502780.1915330.5178900.4623540.0445910.1506040.8676560.1414143.9292490.971112
0Ready_Baseline0.0959570.0431780.1981930.5155010.4379640.0395490.1419001.0000000.0339112.8365130.991139
0Self_KNNSurprisetask0.0895770.0409020.1890570.5130760.4178150.0349960.1351770.8885470.1305923.6118060.978659
0Self_TopRated0.0707660.0276020.1147900.5129430.4114530.0343850.1245461.0000000.0245312.7612380.991660
0Ready_SVDBiased0.0875520.0393460.1912850.5128180.4167550.0344050.1344780.9976670.1652244.1475790.964690
0Self_GlobalAvg0.0676950.0274700.1711870.5095460.3849420.0272130.1183831.0000000.0259742.7117720.992003
0Ready_Random0.0517570.0192420.1281810.5075430.3276780.0226280.1032690.9872750.1847045.1051220.906561
0Ready_I-KNN0.0242140.0089580.0480680.4998850.1548250.0080070.0695210.4023330.4343435.1336500.877999
0Ready_I-KNNBaseline0.0034440.0013620.0117600.4967240.0212090.0008620.0453790.4828210.0598852.2325780.994487
0Ready_U-KNN0.0008450.0002740.0027440.4964410.0074230.0002350.0425330.6021210.0108232.0891860.995706
0Self_BaselineIU0.0010430.0003350.0033480.4964330.0095440.0002200.0428090.6990460.0050511.9459100.995669
0Self_BaselineUI0.0007520.0001680.0016770.4964240.0095440.0002010.0426220.6005300.0050511.8031260.996380
0Self_IKNN0.0002140.0000370.0003680.4963910.0031810.0001180.0417550.3921530.1154404.1747410.965327
\n", "
" ], "text/plain": [ " Model NDCG mAP MRR LAUC HR \\\n", "0 Self_RP3Beta 0.388271 0.248239 0.636318 0.605683 0.910923 \n", "0 Self_P3 0.339114 0.204905 0.572157 0.593544 0.875928 \n", "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n", "0 Self_SVDBaseline 0.164477 0.082973 0.342374 0.538097 0.638388 \n", "0 Ready_SVD 0.109320 0.051383 0.240693 0.519849 0.475080 \n", "0 Self_SVD 0.105416 0.050278 0.191533 0.517890 0.462354 \n", "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n", "0 Self_KNNSurprisetask 0.089577 0.040902 0.189057 0.513076 0.417815 \n", "0 Self_TopRated 0.070766 0.027602 0.114790 0.512943 0.411453 \n", "0 Ready_SVDBiased 0.087552 0.039346 0.191285 0.512818 0.416755 \n", "0 Self_GlobalAvg 0.067695 0.027470 0.171187 0.509546 0.384942 \n", "0 Ready_Random 0.051757 0.019242 0.128181 0.507543 0.327678 \n", "0 Ready_I-KNN 0.024214 0.008958 0.048068 0.499885 0.154825 \n", "0 Ready_I-KNNBaseline 0.003444 0.001362 0.011760 0.496724 0.021209 \n", "0 Ready_U-KNN 0.000845 0.000274 0.002744 0.496441 0.007423 \n", "0 Self_BaselineIU 0.001043 0.000335 0.003348 0.496433 0.009544 \n", "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n", "0 Self_IKNN 0.000214 0.000037 0.000368 0.496391 0.003181 \n", "\n", " F_2 Whole_average Reco in test Test coverage Shannon Gini \n", "0 0.205450 0.376967 0.999788 0.178932 4.549663 0.950182 \n", "0 0.181702 0.340803 1.000000 0.077201 3.875892 0.974947 \n", "0 0.112750 0.249607 1.000000 0.038961 3.159079 0.987317 \n", "0 0.079860 0.205748 0.999894 0.279221 5.159076 0.907220 \n", "0 0.046237 0.154759 0.993425 0.206349 4.442996 0.952832 \n", "0 0.044591 0.150604 0.867656 0.141414 3.929249 0.971112 \n", "0 0.039549 0.141900 1.000000 0.033911 2.836513 0.991139 \n", "0 0.034996 0.135177 0.888547 0.130592 3.611806 0.978659 \n", "0 0.034385 0.124546 1.000000 0.024531 2.761238 0.991660 \n", "0 0.034405 0.134478 0.997667 0.165224 4.147579 0.964690 \n", "0 0.027213 0.118383 1.000000 0.025974 2.711772 0.992003 \n", "0 0.022628 0.103269 0.987275 0.184704 5.105122 0.906561 \n", "0 0.008007 0.069521 0.402333 0.434343 5.133650 0.877999 \n", "0 0.000862 0.045379 0.482821 0.059885 2.232578 0.994487 \n", "0 0.000235 0.042533 0.602121 0.010823 2.089186 0.995706 \n", "0 0.000220 0.042809 0.699046 0.005051 1.945910 0.995669 \n", "0 0.000201 0.042622 0.600530 0.005051 1.803126 0.996380 \n", "0 0.000118 0.041755 0.392153 0.115440 4.174741 0.965327 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[:,np.append(0,np.arange(9, df.shape[1]))]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Check metrics on toy dataset" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "3it [00:00, 1024.67it/s]\n", "3it [00:00, 2922.18it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRF_2Whole_averageReco in testTest coverageShannonGini
0Self_BaselineIU1.6483371.5750.4444440.8888890.5555560.4786320.3333330.750.7205500.6296300.6666670.7222221.00.6984130.6573610.7777780.81.3517840.357143
0Self_BaselineUI1.6124521.4000.4444440.8888890.5555560.4786320.3333330.750.6769070.5740740.6111110.6388891.00.6984130.6375210.8888890.81.3862940.250000
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 F_05 \\\n", "0 Self_BaselineIU 1.648337 1.575 0.444444 0.888889 0.555556 0.478632 \n", "0 Self_BaselineUI 1.612452 1.400 0.444444 0.888889 0.555556 0.478632 \n", "\n", " precision_super recall_super NDCG mAP MRR LAUC HR \\\n", "0 0.333333 0.75 0.720550 0.629630 0.666667 0.722222 1.0 \n", "0 0.333333 0.75 0.676907 0.574074 0.611111 0.638889 1.0 \n", "\n", " F_2 Whole_average Reco in test Test coverage Shannon Gini \n", "0 0.698413 0.657361 0.777778 0.8 1.351784 0.357143 \n", "0 0.698413 0.637521 0.888889 0.8 1.386294 0.250000 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Training data:\n" ] }, { "data": { "text/plain": [ "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n", " [0, 1, 2, 3, 0, 0, 0, 0],\n", " [0, 0, 0, 5, 0, 3, 4, 0]], dtype=int64)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Test data:\n" ] }, { "data": { "text/plain": [ "matrix([[0, 0, 0, 0, 0, 0, 3, 0],\n", " [0, 0, 0, 0, 5, 0, 0, 0],\n", " [5, 0, 4, 0, 0, 0, 0, 2]], dtype=int64)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Recommendations:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456
00305.0204.0604.0
110403.0602.0702.0
220405.0204.0704.0
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6\n", "0 0 30 5.0 20 4.0 60 4.0\n", "1 10 40 3.0 60 2.0 70 2.0\n", "2 20 40 5.0 20 4.0 70 4.0" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Estimations:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useritemest_score
00604.0
110403.0
22003.0
320204.0
420704.0
\n", "
" ], "text/plain": [ " user item est_score\n", "0 0 60 4.0\n", "1 10 40 3.0\n", "2 20 0 3.0\n", "3 20 20 4.0\n", "4 20 70 4.0" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import evaluation_measures as ev\n", "import imp\n", "import helpers\n", "imp.reload(ev)\n", "\n", "dir_path=\"Recommendations generated/toy-example/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None)\n", "\n", "display(ev.evaluate_all(test, dir_path, super_reactions, topK=3))\n", "#also you can just type ev.evaluate_all() - I put above values as default\n", "\n", "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", "reco=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_reco.csv', header=None)\n", "estimations=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_estimations.csv', names=['user', 'item', 'est_score'])\n", "toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \\\n", "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n", "\n", "print('Training data:')\n", "display(toy_train_ui.todense())\n", "\n", "print('Test data:')\n", "display(toy_test_ui.todense())\n", "\n", "print('Recommendations:')\n", "display(reco)\n", "\n", "print('Estimations:')\n", "display(estimations)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Sample recommendations" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Here is what user rated high:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userratingtitlegenres
3326145Toy Story (1995)Animation, Children's, Comedy
190246145Mars Attacks! (1996)Action, Comedy, Sci-Fi, War
551246145Long Kiss Goodnight, The (1996)Action, Thriller
349786145My Best Friend's Wedding (1997)Comedy, Romance
171496144Leaving Las Vegas (1995)Drama, Romance
608606144Juror, The (1996)Drama, Thriller
244866144People vs. Larry Flynt, The (1996)Drama
338316144Independence Day (ID4) (1996)Action, Sci-Fi, War
385656144Spitfire Grill, The (1996)Drama
442016144Father of the Bride Part II (1995)Comedy
414926143Donnie Brasco (1997)Crime, Drama
663936143Postino, Il (1994)Drama, Romance
662316143Stealing Beauty (1996)Drama
645646143Once Upon a Time... When We Were Colored (1995)Drama
511256143Dragonheart (1996)Action, Adventure, Fantasy
\n", "
" ], "text/plain": [ " user rating title \\\n", "332 614 5 Toy Story (1995) \n", "19024 614 5 Mars Attacks! (1996) \n", "55124 614 5 Long Kiss Goodnight, The (1996) \n", "34978 614 5 My Best Friend's Wedding (1997) \n", "17149 614 4 Leaving Las Vegas (1995) \n", "60860 614 4 Juror, The (1996) \n", "24486 614 4 People vs. Larry Flynt, The (1996) \n", "33831 614 4 Independence Day (ID4) (1996) \n", "38565 614 4 Spitfire Grill, The (1996) \n", "44201 614 4 Father of the Bride Part II (1995) \n", "41492 614 3 Donnie Brasco (1997) \n", "66393 614 3 Postino, Il (1994) \n", "66231 614 3 Stealing Beauty (1996) \n", "64564 614 3 Once Upon a Time... When We Were Colored (1995) \n", "51125 614 3 Dragonheart (1996) \n", "\n", " genres \n", "332 Animation, Children's, Comedy \n", "19024 Action, Comedy, Sci-Fi, War \n", "55124 Action, Thriller \n", "34978 Comedy, Romance \n", "17149 Drama, Romance \n", "60860 Drama, Thriller \n", "24486 Drama \n", "33831 Action, Sci-Fi, War \n", "38565 Drama \n", "44201 Comedy \n", "41492 Crime, Drama \n", "66393 Drama, Romance \n", "66231 Drama \n", "64564 Drama \n", "51125 Action, Adventure, Fantasy " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Here is what we recommend:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userrec_nbtitlegenres
612614.01Great Day in Harlem, A (1994)Documentary
1554614.02Tough and Deadly (1995)Action, Drama, Thriller
2496614.03Aiqing wansui (1994)Drama
3438614.04Delta of Venus (1994)Drama
4380614.05Someone Else's America (1995)Drama
5322614.06Saint of Fort Washington, The (1993)Drama
6264614.07Celestial Clockwork (1994)Comedy
7207614.08Some Mother's Son (1996)Drama
9101614.09Maya Lin: A Strong Clear Vision (1994)Documentary
8147614.010Prefontaine (1997)Drama
\n", "
" ], "text/plain": [ " user rec_nb title \\\n", "612 614.0 1 Great Day in Harlem, A (1994) \n", "1554 614.0 2 Tough and Deadly (1995) \n", "2496 614.0 3 Aiqing wansui (1994) \n", "3438 614.0 4 Delta of Venus (1994) \n", "4380 614.0 5 Someone Else's America (1995) \n", "5322 614.0 6 Saint of Fort Washington, The (1993) \n", "6264 614.0 7 Celestial Clockwork (1994) \n", "7207 614.0 8 Some Mother's Son (1996) \n", "9101 614.0 9 Maya Lin: A Strong Clear Vision (1994) \n", "8147 614.0 10 Prefontaine (1997) \n", "\n", " genres \n", "612 Documentary \n", "1554 Action, Drama, Thriller \n", "2496 Drama \n", "3438 Drama \n", "4380 Drama \n", "5322 Drama \n", "6264 Comedy \n", "7207 Drama \n", "9101 Documentary \n", "8147 Drama " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", "items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n", "\n", "user=random.choice(list(set(train['user'])))\n", "\n", "train_content=pd.merge(train, items, left_on='item', right_on='id')\n", "\n", "print('Here is what user rated high:')\n", "display(train_content[train_content['user']==user][['user', 'rating', 'title', 'genres']]\\\n", " .sort_values(by='rating', ascending=False)[:15])\n", "\n", "reco = np.loadtxt('Recommendations generated/ml-100k/Self_BaselineUI_reco.csv', delimiter=',')\n", "items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n", "\n", "# Let's ignore scores - they are not used in evaluation: \n", "reco_users=reco[:,:1]\n", "reco_items=reco[:,1::2]\n", "# Let's put them into one array\n", "reco=np.concatenate((reco_users, reco_items), axis=1)\n", "\n", "# Let's rebuild it user-item dataframe\n", "recommended=[]\n", "for row in reco:\n", " for rec_nb, entry in enumerate(row[1:]):\n", " recommended.append((row[0], rec_nb+1, entry))\n", "recommended=pd.DataFrame(recommended, columns=['user','rec_nb', 'item'])\n", "\n", "recommended_content=pd.merge(recommended, items, left_on='item', right_on='id')\n", "\n", "print('Here is what we recommend:')\n", "recommended_content[recommended_content['user']==user][['user', 'rec_nb', 'title', 'genres']].sort_values(by='rec_nb')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# project task 3: implement some other evaluation measure" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# it may be your idea, modification of what we have already implemented \n", "# (for example Hit2 rate which would count as a success users whoreceived at least 2 relevant recommendations) \n", "# or something well-known\n", "# expected output: modification of evaluation_measures.py such that evaluate_all will also display your measure" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 1901.35it/s]\n", "943it [00:00, 4856.92it/s]\n", "943it [00:00, 4595.27it/s]\n", "943it [00:00, 2703.28it/s]\n", "943it [00:00, 4351.40it/s]\n", "943it [00:00, 4062.22it/s]\n", "943it [00:00, 4997.62it/s]\n", "943it [00:00, 4371.27it/s]\n", "943it [00:00, 4910.24it/s]\n", "943it [00:00, 4240.88it/s]\n", "943it [00:00, 4037.69it/s]\n", "943it [00:00, 3703.04it/s]\n", "943it [00:00, 2715.94it/s]\n", "943it [00:00, 5319.09it/s]\n", "943it [00:00, 3988.17it/s]\n", "943it [00:00, 4858.36it/s]\n", "943it [00:00, 5096.80it/s]\n", "943it [00:00, 4678.05it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRF_2Whole_averageReco in testTest coverageShannonGini
0Self_RP3Beta3.7029283.5277130.3226940.2160690.2121520.2475380.2452790.2849830.3882710.2482390.6363180.6056830.9109230.2054500.3769670.9997880.1789324.5496630.950182
0Self_P33.7024463.5272730.2821850.1920920.1867490.2169800.2041850.2400960.3391140.2049050.5721570.5935440.8759280.1817020.3408031.0000000.0772013.8758920.974947
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.1374730.2146510.1117070.4009390.5555460.7656420.1127500.2496071.0000000.0389613.1590790.987317
0Self_SVDBaseline3.6456663.4802460.1378580.0823980.0841510.1010630.1079400.1093930.1644770.0829730.3423740.5380970.6383880.0798600.2057480.9998940.2792215.1590760.907220
0Ready_SVD0.9525630.7501580.0944860.0462740.0513890.0656250.0826180.0741500.1093200.0513830.2406930.5198490.4750800.0462370.1547590.9934250.2063494.4429960.952832
0Self_SVD0.9148900.7179620.1029690.0423250.0520220.0693130.0935620.0749940.1054160.0502780.1915330.5178900.4623540.0445910.1506040.8676560.1414143.9292490.971112
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379640.0395490.1419001.0000000.0339112.8365130.991139
0Self_KNNSurprisetask0.9462550.7452090.0834570.0328480.0412270.0554930.0747850.0488900.0895770.0409020.1890570.5130760.4178150.0349960.1351770.8885470.1305923.6118060.978659
0Self_TopRated2.5082582.2179090.0793210.0326670.0399830.0531700.0688840.0485820.0707660.0276020.1147900.5129430.4114530.0343850.1245461.0000000.0245312.7612380.991660
0Ready_SVDBiased0.9421410.7427600.0812300.0323440.0403020.0539320.0726390.0511260.0875520.0393460.1912850.5128180.4167550.0344050.1344780.9976670.1652244.1475790.964690
0Self_GlobalAvg1.1257600.9435340.0611880.0259680.0313830.0413430.0405580.0321070.0676950.0274700.1711870.5095460.3849420.0272130.1183831.0000000.0259742.7117720.992003
0Ready_Random1.5256331.2257140.0477200.0220490.0254940.0328450.0290770.0250150.0517570.0192420.1281810.5075430.3276780.0226280.1032690.9872750.1847045.1051220.906561
0Ready_I-KNN1.0303860.8130670.0260870.0069080.0105930.0160460.0211370.0095220.0242140.0089580.0480680.4998850.1548250.0080070.0695210.4023330.4343435.1336500.877999
0Ready_I-KNNBaseline0.9353270.7374240.0025450.0007550.0011050.0016020.0022530.0009300.0034440.0013620.0117600.4967240.0212090.0008620.0453790.4828210.0598852.2325780.994487
0Ready_U-KNN1.0234950.8079130.0007420.0002050.0003050.0004490.0005360.0001980.0008450.0002740.0027440.4964410.0074230.0002350.0425330.6021210.0108232.0891860.995706
0Self_BaselineIU0.9581360.7540510.0009540.0001880.0002980.0004810.0006440.0002230.0010430.0003350.0033480.4964330.0095440.0002200.0428090.6990460.0050511.9459100.995669
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.0001890.0007520.0001680.0016770.4964240.0095440.0002010.0426220.6005300.0050511.8031260.996380
0Self_IKNN1.0183630.8087930.0003180.0001080.0001400.0001890.0000000.0000000.0002140.0000370.0003680.4963910.0031810.0001180.0417550.3921530.1154404.1747410.965327
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_RP3Beta 3.702928 3.527713 0.322694 0.216069 0.212152 \n", "0 Self_P3 3.702446 3.527273 0.282185 0.192092 0.186749 \n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Self_SVDBaseline 3.645666 3.480246 0.137858 0.082398 0.084151 \n", "0 Ready_SVD 0.952563 0.750158 0.094486 0.046274 0.051389 \n", "0 Self_SVD 0.914890 0.717962 0.102969 0.042325 0.052022 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", "0 Self_KNNSurprisetask 0.946255 0.745209 0.083457 0.032848 0.041227 \n", "0 Self_TopRated 2.508258 2.217909 0.079321 0.032667 0.039983 \n", "0 Ready_SVDBiased 0.942141 0.742760 0.081230 0.032344 0.040302 \n", "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", "0 Ready_Random 1.525633 1.225714 0.047720 0.022049 0.025494 \n", "0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 \n", "0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 \n", "0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 \n", "0 Self_BaselineIU 0.958136 0.754051 0.000954 0.000188 0.000298 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n", "\n", " F_05 precision_super recall_super NDCG mAP MRR \\\n", "0 0.247538 0.245279 0.284983 0.388271 0.248239 0.636318 \n", "0 0.216980 0.204185 0.240096 0.339114 0.204905 0.572157 \n", "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n", "0 0.101063 0.107940 0.109393 0.164477 0.082973 0.342374 \n", "0 0.065625 0.082618 0.074150 0.109320 0.051383 0.240693 \n", "0 0.069313 0.093562 0.074994 0.105416 0.050278 0.191533 \n", "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n", "0 0.055493 0.074785 0.048890 0.089577 0.040902 0.189057 \n", "0 0.053170 0.068884 0.048582 0.070766 0.027602 0.114790 \n", "0 0.053932 0.072639 0.051126 0.087552 0.039346 0.191285 \n", "0 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 \n", "0 0.032845 0.029077 0.025015 0.051757 0.019242 0.128181 \n", "0 0.016046 0.021137 0.009522 0.024214 0.008958 0.048068 \n", "0 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 \n", "0 0.000449 0.000536 0.000198 0.000845 0.000274 0.002744 \n", "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n", "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n", "0 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 \n", "\n", " LAUC HR F_2 Whole_average Reco in test Test coverage \\\n", "0 0.605683 0.910923 0.205450 0.376967 0.999788 0.178932 \n", "0 0.593544 0.875928 0.181702 0.340803 1.000000 0.077201 \n", "0 0.555546 0.765642 0.112750 0.249607 1.000000 0.038961 \n", "0 0.538097 0.638388 0.079860 0.205748 0.999894 0.279221 \n", "0 0.519849 0.475080 0.046237 0.154759 0.993425 0.206349 \n", "0 0.517890 0.462354 0.044591 0.150604 0.867656 0.141414 \n", "0 0.515501 0.437964 0.039549 0.141900 1.000000 0.033911 \n", "0 0.513076 0.417815 0.034996 0.135177 0.888547 0.130592 \n", "0 0.512943 0.411453 0.034385 0.124546 1.000000 0.024531 \n", "0 0.512818 0.416755 0.034405 0.134478 0.997667 0.165224 \n", "0 0.509546 0.384942 0.027213 0.118383 1.000000 0.025974 \n", "0 0.507543 0.327678 0.022628 0.103269 0.987275 0.184704 \n", "0 0.499885 0.154825 0.008007 0.069521 0.402333 0.434343 \n", "0 0.496724 0.021209 0.000862 0.045379 0.482821 0.059885 \n", "0 0.496441 0.007423 0.000235 0.042533 0.602121 0.010823 \n", "0 0.496433 0.009544 0.000220 0.042809 0.699046 0.005051 \n", "0 0.496424 0.009544 0.000201 0.042622 0.600530 0.005051 \n", "0 0.496391 0.003181 0.000118 0.041755 0.392153 0.115440 \n", "\n", " Shannon Gini \n", "0 4.549663 0.950182 \n", "0 3.875892 0.974947 \n", "0 3.159079 0.987317 \n", "0 5.159076 0.907220 \n", "0 4.442996 0.952832 \n", "0 3.929249 0.971112 \n", "0 2.836513 0.991139 \n", "0 3.611806 0.978659 \n", "0 2.761238 0.991660 \n", "0 4.147579 0.964690 \n", "0 2.711772 0.992003 \n", "0 5.105122 0.906561 \n", "0 5.133650 0.877999 \n", "0 2.232578 0.994487 \n", "0 2.089186 0.995706 \n", "0 1.945910 0.995669 \n", "0 1.803126 0.996380 \n", "0 4.174741 0.965327 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dir_path=\"Recommendations generated/ml-100k/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "\n", "ev.evaluate_all(test, dir_path, super_reactions)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }