{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Prepare test set" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import scipy.sparse as sparse\n", "from collections import defaultdict\n", "from itertools import chain\n", "import random\n", "from tqdm import tqdm\n", "\n", "# In evaluation we do not load train set - it is not needed\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "test.columns=['user', 'item', 'rating', 'timestamp']\n", "\n", "test['user_code'] = test['user'].astype(\"category\").cat.codes\n", "test['item_code'] = test['item'].astype(\"category\").cat.codes\n", "\n", "user_code_id = dict(enumerate(test['user'].astype(\"category\").cat.categories))\n", "user_id_code = dict((v, k) for k, v in user_code_id.items())\n", "item_code_id = dict(enumerate(test['item'].astype(\"category\").cat.categories))\n", "item_id_code = dict((v, k) for k, v in item_code_id.items())\n", "\n", "test_ui = sparse.csr_matrix((test['rating'], (test['user_code'], test['item_code'])))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Estimations metrics" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)\n", "estimations_df.columns=['user', 'item' ,'score']\n", "\n", "estimations_df['user_code']=[user_id_code[user] for user in estimations_df['user']]\n", "estimations_df['item_code']=[item_id_code[item] for item in estimations_df['item']]\n", "estimations=sparse.csr_matrix((estimations_df['score'], (estimations_df['user_code'], estimations_df['item_code'])), shape=test_ui.shape)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def estimations_metrics(test_ui, estimations):\n", " result=[]\n", "\n", " RMSE=(np.sum((estimations.data-test_ui.data)**2)/estimations.nnz)**(1/2)\n", " result.append(['RMSE', RMSE])\n", "\n", " MAE=np.sum(abs(estimations.data-test_ui.data))/estimations.nnz\n", " result.append(['MAE', MAE])\n", " \n", " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n", " df_result.columns=list(zip(*result))[0]\n", " return df_result" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RMSEMAE
00.9494590.752487
\n", "
" ], "text/plain": [ " RMSE MAE\n", "0 0.949459 0.752487" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# in case of error (in the laboratories) you might have to switch to the other version of pandas\n", "# try !pip3 install pandas=='1.0.3' (or pip if you use python 2) and restart the kernel\n", "\n", "estimations_metrics(test_ui, estimations)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Ranking metrics" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[663, 475, 62, ..., 472, 269, 503],\n", " [ 48, 313, 475, ..., 591, 175, 466],\n", " [351, 313, 475, ..., 591, 175, 466],\n", " ...,\n", " [259, 313, 475, ..., 11, 591, 175],\n", " [ 33, 313, 475, ..., 11, 591, 175],\n", " [ 77, 313, 475, ..., 11, 591, 175]])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "reco = np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')\n", "# Let's ignore scores - they are not used in evaluation: \n", "users=reco[:,:1]\n", "items=reco[:,1::2]\n", "# Let's use inner ids instead of real ones\n", "users=np.vectorize(lambda x: user_id_code.setdefault(x, -1))(users)\n", "items=np.vectorize(lambda x: item_id_code.setdefault(x, -1))(items) # maybe items we recommend are not in test set\n", "# Let's put them into one array\n", "reco=np.concatenate((users, items), axis=1)\n", "reco" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def ranking_metrics(test_ui, reco, super_reactions=[], topK=10):\n", " \n", " nb_items=test_ui.shape[1]\n", " relevant_users, super_relevant_users, prec, rec, F_1, F_05, prec_super, rec_super, ndcg, mAP, MRR, LAUC, HR=\\\n", " 0,0,0,0,0,0,0,0,0,0,0,0,0\n", " \n", " cg = (1.0 / np.log2(np.arange(2, topK + 2)))\n", " cg_sum = np.cumsum(cg)\n", " \n", " for (nb_user, user) in tqdm(enumerate(reco[:,0])):\n", " u_rated_items=test_ui.indices[test_ui.indptr[user]:test_ui.indptr[user+1]]\n", " nb_u_rated_items=len(u_rated_items)\n", " if nb_u_rated_items>0: # skip users with no items in test set (still possible that there will be no super items)\n", " relevant_users+=1\n", " \n", " u_super_items=u_rated_items[np.vectorize(lambda x: x in super_reactions)\\\n", " (test_ui.data[test_ui.indptr[user]:test_ui.indptr[user+1]])]\n", " # more natural seems u_super_items=[item for item in u_rated_items if test_ui[user,item] in super_reactions]\n", " # but accesing test_ui[user,item] is expensive -we should avoid doing it\n", " if len(u_super_items)>0:\n", " super_relevant_users+=1\n", " \n", " user_successes=np.zeros(topK)\n", " nb_user_successes=0\n", " user_super_successes=np.zeros(topK)\n", " nb_user_super_successes=0\n", " \n", " # evaluation\n", " for (item_position,item) in enumerate(reco[nb_user,1:topK+1]):\n", " if item in u_rated_items:\n", " user_successes[item_position]=1\n", " nb_user_successes+=1\n", " if item in u_super_items:\n", " user_super_successes[item_position]=1\n", " nb_user_super_successes+=1\n", " \n", " prec_u=nb_user_successes/topK \n", " prec+=prec_u\n", " \n", " rec_u=nb_user_successes/nb_u_rated_items\n", " rec+=rec_u\n", " \n", " F_1+=2*(prec_u*rec_u)/(prec_u+rec_u) if prec_u+rec_u>0 else 0\n", " F_05+=(0.5**2+1)*(prec_u*rec_u)/(0.5**2*prec_u+rec_u) if prec_u+rec_u>0 else 0\n", " \n", " prec_super+=nb_user_super_successes/topK\n", " rec_super+=nb_user_super_successes/max(len(u_super_items),1) # to set 0 if no super items\n", " ndcg+=np.dot(user_successes,cg)/cg_sum[min(topK, nb_u_rated_items)-1]\n", " \n", " cumsum_successes=np.cumsum(user_successes)\n", " mAP+=np.dot(cumsum_successes/np.arange(1,topK+1), user_successes)/min(topK, nb_u_rated_items)\n", " MRR+=1/(user_successes.nonzero()[0][0]+1) if user_successes.nonzero()[0].size>0 else 0\n", " LAUC+=(np.dot(cumsum_successes, 1-user_successes)+\\\n", " (nb_user_successes+nb_u_rated_items)/2*((nb_items-nb_u_rated_items)-(topK-nb_user_successes)))/\\\n", " ((nb_items-nb_u_rated_items)*nb_u_rated_items)\n", " \n", " HR+=nb_user_successes>0\n", " \n", " \n", " result=[]\n", " result.append(('precision', prec/relevant_users))\n", " result.append(('recall', rec/relevant_users))\n", " result.append(('F_1', F_1/relevant_users))\n", " result.append(('F_05', F_05/relevant_users))\n", " result.append(('precision_super', prec_super/super_relevant_users))\n", " result.append(('recall_super', rec_super/super_relevant_users))\n", " result.append(('NDCG', ndcg/relevant_users))\n", " result.append(('mAP', mAP/relevant_users))\n", " result.append(('MRR', MRR/relevant_users))\n", " result.append(('LAUC', LAUC/relevant_users))\n", " result.append(('HR', HR/relevant_users))\n", "\n", " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n", " df_result.columns=list(zip(*result))[0]\n", " return df_result" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 7666.87it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
precisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHR
00.091410.0376520.046030.0612860.0796140.0564630.0959570.0431780.1981930.5155010.437964
\n", "
" ], "text/plain": [ " precision recall F_1 F_05 precision_super recall_super \\\n", "0 0.09141 0.037652 0.04603 0.061286 0.079614 0.056463 \n", "\n", " NDCG mAP MRR LAUC HR \n", "0 0.095957 0.043178 0.198193 0.515501 0.437964 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ranking_metrics(test_ui, reco, super_reactions=[4,5], topK=10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Diversity metrics" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def diversity_metrics(test_ui, reco, topK=10):\n", " \n", " frequencies=defaultdict(int)\n", " \n", " # let's assign 0 to all items in test set\n", " for item in list(set(test_ui.indices)):\n", " frequencies[item]=0\n", " \n", " # counting frequencies\n", " for item in reco[:,1:].flat:\n", " frequencies[item]+=1\n", " \n", " nb_reco_outside_test=frequencies[-1]\n", " del frequencies[-1]\n", " \n", " frequencies=np.array(list(frequencies.values()))\n", " \n", " nb_rec_items=len(frequencies[frequencies>0])\n", " nb_reco_inside_test=np.sum(frequencies)\n", " \n", " frequencies=frequencies/np.sum(frequencies)\n", " frequencies=np.sort(frequencies)\n", " \n", " with np.errstate(divide='ignore'): # let's put zeros put items with 0 frequency and ignore division warning\n", " log_frequencies=np.nan_to_num(np.log(frequencies), posinf=0, neginf=0)\n", " \n", " result=[]\n", " result.append(('Reco in test', nb_reco_inside_test/(nb_reco_inside_test+nb_reco_outside_test)))\n", " result.append(('Test coverage', nb_rec_items/test_ui.shape[1]))\n", " result.append(('Shannon', -np.dot(frequencies, log_frequencies)))\n", " result.append(('Gini', np.dot(frequencies, np.arange(1-len(frequencies), len(frequencies), 2))/(len(frequencies)-1)))\n", " \n", " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n", " df_result.columns=list(zip(*result))[0]\n", " return df_result" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Reco in testTest coverageShannonGini
01.00.0339112.8365130.991139
\n", "
" ], "text/plain": [ " Reco in test Test coverage Shannon Gini\n", "0 1.0 0.033911 2.836513 0.991139" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# in case of errors try !pip3 install numpy==1.18.4 (or pip if you use python 2) and restart the kernel\n", "\n", "import evaluation_measures as ev\n", "import imp\n", "imp.reload(ev)\n", "\n", "x=diversity_metrics(test_ui, reco, topK=10)\n", "x" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# To be used in other notebooks" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 7370.69it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
00.9494590.7524870.091410.0376520.046030.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379641.00.0339112.8365130.991139
\n", "
" ], "text/plain": [ " RMSE MAE precision recall F_1 F_05 \\\n", "0 0.949459 0.752487 0.09141 0.037652 0.04603 0.061286 \n", "\n", " precision_super recall_super NDCG mAP MRR LAUC \\\n", "0 0.079614 0.056463 0.095957 0.043178 0.198193 0.515501 \n", "\n", " HR Reco in test Test coverage Shannon Gini \n", "0 0.437964 1.0 0.033911 2.836513 0.991139 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import evaluation_measures as ev\n", "import imp\n", "imp.reload(ev)\n", "\n", "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)\n", "reco=np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')\n", "\n", "ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n", " estimations_df=estimations_df, \n", " reco=reco,\n", " super_reactions=[4,5])\n", "#also you can just type ev.evaluate_all(estimations_df, reco) - I put above values as default" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 7772.74it/s]\n", "943it [00:00, 5607.69it/s]\n", "943it [00:00, 4737.64it/s]\n", "943it [00:00, 4986.41it/s]\n", "943it [00:00, 3513.77it/s]\n" ] } ], "source": [ "import evaluation_measures as ev\n", "import imp\n", "imp.reload(ev)\n", "\n", "dir_path=\"Recommendations generated/ml-100k/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "\n", "df=ev.evaluate_all(test, dir_path, super_reactions)\n", "#also you can just type ev.evaluate_all() - I put above values as default" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_super
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.137473
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.056463
0Self_GlobalAvg1.1257600.9435340.0611880.0259680.0313830.0413430.0405580.032107
0Ready_Random1.5317241.2303840.0494170.0225580.0254900.0332420.0303650.022626
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.000189
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", "0 Ready_Random 1.531724 1.230384 0.049417 0.022558 0.025490 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "\n", " F_05 precision_super recall_super \n", "0 0.141584 0.130472 0.137473 \n", "0 0.061286 0.079614 0.056463 \n", "0 0.041343 0.040558 0.032107 \n", "0 0.033242 0.030365 0.022626 \n", "0 0.000463 0.000644 0.000189 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[:,:9]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_TopPop0.2146510.1117070.4009390.5555460.7656421.0000000.0389613.1590790.987317
0Ready_Baseline0.0959570.0431780.1981930.5155010.4379641.0000000.0339112.8365130.991139
0Self_GlobalAvg0.0676950.0274700.1711870.5095460.3849421.0000000.0259742.7117720.992003
0Ready_Random0.0541660.0216560.1283780.5078020.3255570.9888650.1904765.1000330.907724
0Self_BaselineUI0.0007520.0001680.0016770.4964240.0095440.6005300.0050511.8031260.996380
\n", "
" ], "text/plain": [ " Model NDCG mAP MRR LAUC HR \\\n", "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n", "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n", "0 Self_GlobalAvg 0.067695 0.027470 0.171187 0.509546 0.384942 \n", "0 Ready_Random 0.054166 0.021656 0.128378 0.507802 0.325557 \n", "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n", "\n", " Reco in test Test coverage Shannon Gini \n", "0 1.000000 0.038961 3.159079 0.987317 \n", "0 1.000000 0.033911 2.836513 0.991139 \n", "0 1.000000 0.025974 2.711772 0.992003 \n", "0 0.988865 0.190476 5.100033 0.907724 \n", "0 0.600530 0.005051 1.803126 0.996380 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[:,np.append(0,np.arange(9, df.shape[1]))]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Check metrics on toy dataset" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "3it [00:00, 1941.81it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_BaselineUI1.6124521.40.4444440.8888890.5555560.4786320.3333330.750.6769070.5740740.6111110.6388891.00.8888890.81.3862940.25
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 F_05 \\\n", "0 Self_BaselineUI 1.612452 1.4 0.444444 0.888889 0.555556 0.478632 \n", "\n", " precision_super recall_super NDCG mAP MRR LAUC HR \\\n", "0 0.333333 0.75 0.676907 0.574074 0.611111 0.638889 1.0 \n", "\n", " Reco in test Test coverage Shannon Gini \n", "0 0.888889 0.8 1.386294 0.25 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Training data:\n" ] }, { "data": { "text/plain": [ "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n", " [0, 1, 2, 3, 0, 0, 0, 0],\n", " [0, 0, 0, 5, 0, 3, 4, 0]])" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Test data:\n" ] }, { "data": { "text/plain": [ "matrix([[0, 0, 0, 0, 0, 0, 3, 0],\n", " [0, 0, 0, 0, 5, 0, 0, 0],\n", " [5, 0, 4, 0, 0, 0, 0, 2]])" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Recommendations:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456
00305.0204.0604.0
110403.0602.0702.0
220405.0204.0704.0
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6\n", "0 0 30 5.0 20 4.0 60 4.0\n", "1 10 40 3.0 60 2.0 70 2.0\n", "2 20 40 5.0 20 4.0 70 4.0" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Estimations:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useritemest_score
00604.0
110403.0
22003.0
320204.0
420704.0
\n", "
" ], "text/plain": [ " user item est_score\n", "0 0 60 4.0\n", "1 10 40 3.0\n", "2 20 0 3.0\n", "3 20 20 4.0\n", "4 20 70 4.0" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import evaluation_measures as ev\n", "import imp\n", "import helpers\n", "imp.reload(ev)\n", "\n", "dir_path=\"Recommendations generated/toy-example/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None)\n", "\n", "display(ev.evaluate_all(test, dir_path, super_reactions, topK=3))\n", "#also you can just type ev.evaluate_all() - I put above values as default\n", "\n", "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", "reco=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_reco.csv', header=None)\n", "estimations=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_estimations.csv', names=['user', 'item', 'est_score'])\n", "toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \\\n", "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n", "\n", "print('Training data:')\n", "display(toy_train_ui.todense())\n", "\n", "print('Test data:')\n", "display(toy_test_ui.todense())\n", "\n", "print('Recommendations:')\n", "display(reco)\n", "\n", "print('Estimations:')\n", "display(estimations)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Sample recommendations" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Here is what user rated high:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userratingtitlegenres
29857895Star Wars (1977)Action, Adventure, Romance, Sci-Fi, War
259807895Dead Man Walking (1995)Drama
93577895Last Supper, The (1995)Drama, Thriller
173067895Leaving Las Vegas (1995)Drama, Romance
364747895Swingers (1996)Comedy, Drama
651397894Welcome to the Dollhouse (1995)Comedy, Drama
619757894Private Parts (1997)Comedy, Drama
565227894Waiting for Guffman (1996)Comedy
414147894Donnie Brasco (1997)Crime, Drama
366177894Lone Star (1996)Drama, Mystery
245017894People vs. Larry Flynt, The (1996)Drama
202107894Return of the Jedi (1983)Action, Adventure, Romance, Sci-Fi, War
82307893Beautiful Girls (1996)Drama
197817893Liar Liar (1997)Comedy
393877893Sleepers (1996)Crime, Drama
\n", "
" ], "text/plain": [ " user rating title \\\n", "2985 789 5 Star Wars (1977) \n", "25980 789 5 Dead Man Walking (1995) \n", "9357 789 5 Last Supper, The (1995) \n", "17306 789 5 Leaving Las Vegas (1995) \n", "36474 789 5 Swingers (1996) \n", "65139 789 4 Welcome to the Dollhouse (1995) \n", "61975 789 4 Private Parts (1997) \n", "56522 789 4 Waiting for Guffman (1996) \n", "41414 789 4 Donnie Brasco (1997) \n", "36617 789 4 Lone Star (1996) \n", "24501 789 4 People vs. Larry Flynt, The (1996) \n", "20210 789 4 Return of the Jedi (1983) \n", "8230 789 3 Beautiful Girls (1996) \n", "19781 789 3 Liar Liar (1997) \n", "39387 789 3 Sleepers (1996) \n", "\n", " genres \n", "2985 Action, Adventure, Romance, Sci-Fi, War \n", "25980 Drama \n", "9357 Drama, Thriller \n", "17306 Drama, Romance \n", "36474 Comedy, Drama \n", "65139 Comedy, Drama \n", "61975 Comedy, Drama \n", "56522 Comedy \n", "41414 Crime, Drama \n", "36617 Drama, Mystery \n", "24501 Drama \n", "20210 Action, Adventure, Romance, Sci-Fi, War \n", "8230 Drama \n", "19781 Comedy \n", "39387 Crime, Drama " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Here is what we recommend:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userrec_nbtitlegenres
787789.01Great Day in Harlem, A (1994)Documentary
1729789.02Tough and Deadly (1995)Action, Drama, Thriller
2671789.03Aiqing wansui (1994)Drama
3613789.04Delta of Venus (1994)Drama
4555789.05Someone Else's America (1995)Drama
5497789.06Saint of Fort Washington, The (1993)Drama
6439789.07Celestial Clockwork (1994)Comedy
7380789.08Some Mother's Son (1996)Drama
9276789.09Maya Lin: A Strong Clear Vision (1994)Documentary
8322789.010Prefontaine (1997)Drama
\n", "
" ], "text/plain": [ " user rec_nb title \\\n", "787 789.0 1 Great Day in Harlem, A (1994) \n", "1729 789.0 2 Tough and Deadly (1995) \n", "2671 789.0 3 Aiqing wansui (1994) \n", "3613 789.0 4 Delta of Venus (1994) \n", "4555 789.0 5 Someone Else's America (1995) \n", "5497 789.0 6 Saint of Fort Washington, The (1993) \n", "6439 789.0 7 Celestial Clockwork (1994) \n", "7380 789.0 8 Some Mother's Son (1996) \n", "9276 789.0 9 Maya Lin: A Strong Clear Vision (1994) \n", "8322 789.0 10 Prefontaine (1997) \n", "\n", " genres \n", "787 Documentary \n", "1729 Action, Drama, Thriller \n", "2671 Drama \n", "3613 Drama \n", "4555 Drama \n", "5497 Drama \n", "6439 Comedy \n", "7380 Drama \n", "9276 Documentary \n", "8322 Drama " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", "items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n", "\n", "user=random.choice(list(set(train['user'])))\n", "\n", "train_content=pd.merge(train, items, left_on='item', right_on='id')\n", "\n", "print('Here is what user rated high:')\n", "display(train_content[train_content['user']==user][['user', 'rating', 'title', 'genres']]\\\n", " .sort_values(by='rating', ascending=False)[:15])\n", "\n", "reco = np.loadtxt('Recommendations generated/ml-100k/Self_BaselineUI_reco.csv', delimiter=',')\n", "items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n", "\n", "# Let's ignore scores - they are not used in evaluation: \n", "reco_users=reco[:,:1]\n", "reco_items=reco[:,1::2]\n", "# Let's put them into one array\n", "reco=np.concatenate((reco_users, reco_items), axis=1)\n", "\n", "# Let's rebuild it user-item dataframe\n", "recommended=[]\n", "for row in reco:\n", " for rec_nb, entry in enumerate(row[1:]):\n", " recommended.append((row[0], rec_nb+1, entry))\n", "recommended=pd.DataFrame(recommended, columns=['user','rec_nb', 'item'])\n", "\n", "recommended_content=pd.merge(recommended, items, left_on='item', right_on='id')\n", "\n", "print('Here is what we recommend:')\n", "recommended_content[recommended_content['user']==user][['user', 'rec_nb', 'title', 'genres']].sort_values(by='rec_nb')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# project task 3: implement some other evaluation measure" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# it may be your idea, modification of what we have already implemented \n", "# (for example Hit2 rate which would count as a success users whoreceived at least 2 relevant recommendations) \n", "# or something well-known\n", "# expected output: modification of evaluation_measures.py such that evaluate_all will also display your measure" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 4479.94it/s]\n", "943it [00:00, 4036.40it/s]\n", "943it [00:00, 4598.99it/s]\n", "943it [00:00, 5170.18it/s]\n", "943it [00:00, 4778.23it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.1374730.2146510.1117070.4009390.5555460.7656421.0000000.0389613.1590790.987317
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379641.0000000.0339112.8365130.991139
0Self_GlobalAvg1.1257600.9435340.0611880.0259680.0313830.0413430.0405580.0321070.0676950.0274700.1711870.5095460.3849421.0000000.0259742.7117720.992003
0Ready_Random1.5317241.2303840.0494170.0225580.0254900.0332420.0303650.0226260.0541660.0216560.1283780.5078020.3255570.9888650.1904765.1000330.907724
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.0001890.0007520.0001680.0016770.4964240.0095440.6005300.0050511.8031260.996380
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", "0 Ready_Random 1.531724 1.230384 0.049417 0.022558 0.025490 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "\n", " F_05 precision_super recall_super NDCG mAP MRR \\\n", "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n", "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n", "0 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 \n", "0 0.033242 0.030365 0.022626 0.054166 0.021656 0.128378 \n", "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n", "\n", " LAUC HR Reco in test Test coverage Shannon Gini \n", "0 0.555546 0.765642 1.000000 0.038961 3.159079 0.987317 \n", "0 0.515501 0.437964 1.000000 0.033911 2.836513 0.991139 \n", "0 0.509546 0.384942 1.000000 0.025974 2.711772 0.992003 \n", "0 0.507802 0.325557 0.988865 0.190476 5.100033 0.907724 \n", "0 0.496424 0.009544 0.600530 0.005051 1.803126 0.996380 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dir_path=\"Recommendations generated/ml-100k/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "\n", "ev.evaluate_all(test, dir_path, super_reactions)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }