{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Prepare test set" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import scipy.sparse as sparse\n", "from collections import defaultdict\n", "from itertools import chain\n", "import random\n", "from tqdm import tqdm\n", "\n", "# In evaluation we do not load train set - it is not needed\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "test.columns=['user', 'item', 'rating', 'timestamp']\n", "\n", "test['user_code'] = test['user'].astype(\"category\").cat.codes\n", "test['item_code'] = test['item'].astype(\"category\").cat.codes\n", "\n", "user_code_id = dict(enumerate(test['user'].astype(\"category\").cat.categories))\n", "user_id_code = dict((v, k) for k, v in user_code_id.items())\n", "item_code_id = dict(enumerate(test['item'].astype(\"category\").cat.categories))\n", "item_id_code = dict((v, k) for k, v in item_code_id.items())\n", "\n", "test_ui = sparse.csr_matrix((test['rating'], (test['user_code'], test['item_code'])))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Estimations metrics" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)\n", "estimations_df.columns=['user', 'item' ,'score']\n", "\n", "estimations_df['user_code']=[user_id_code[user] for user in estimations_df['user']]\n", "estimations_df['item_code']=[item_id_code[item] for item in estimations_df['item']]\n", "estimations=sparse.csr_matrix((estimations_df['score'], (estimations_df['user_code'], estimations_df['item_code'])), shape=test_ui.shape)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def estimations_metrics(test_ui, estimations):\n", " result=[]\n", "\n", " RMSE=(np.sum((estimations.data-test_ui.data)**2)/estimations.nnz)**(1/2)\n", " result.append(['RMSE', RMSE])\n", "\n", " MAE=np.sum(abs(estimations.data-test_ui.data))/estimations.nnz\n", " result.append(['MAE', MAE])\n", " \n", " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n", " df_result.columns=list(zip(*result))[0]\n", " return df_result" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RMSEMAE
00.9494590.752487
\n", "
" ], "text/plain": [ " RMSE MAE\n", "0 0.949459 0.752487" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# in case of error (in the laboratories) you might have to switch to the other version of pandas\n", "# try !pip3 install pandas=='1.0.3' (or pip if you use python 2) and restart the kernel\n", "\n", "estimations_metrics(test_ui, estimations)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Ranking metrics" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[663, 475, 62, ..., 472, 269, 503],\n", " [ 48, 313, 475, ..., 591, 175, 466],\n", " [351, 313, 475, ..., 591, 175, 466],\n", " ...,\n", " [259, 313, 475, ..., 11, 591, 175],\n", " [ 33, 313, 475, ..., 11, 591, 175],\n", " [ 77, 313, 475, ..., 11, 591, 175]])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "reco = np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')\n", "# Let's ignore scores - they are not used in evaluation: \n", "users=reco[:,:1]\n", "items=reco[:,1::2]\n", "# Let's use inner ids instead of real ones\n", "users=np.vectorize(lambda x: user_id_code.setdefault(x, -1))(users)\n", "items=np.vectorize(lambda x: item_id_code.setdefault(x, -1))(items) # maybe items we recommend are not in test set\n", "# Let's put them into one array\n", "reco=np.concatenate((users, items), axis=1)\n", "reco" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def ranking_metrics(test_ui, reco, super_reactions=[], topK=10):\n", " \n", " nb_items=test_ui.shape[1]\n", " relevant_users, super_relevant_users, prec, rec, F_1, F_05, prec_super, rec_super, ndcg, mAP, MRR, LAUC, HR=\\\n", " 0,0,0,0,0,0,0,0,0,0,0,0,0\n", " \n", " cg = (1.0 / np.log2(np.arange(2, topK + 2)))\n", " cg_sum = np.cumsum(cg)\n", " \n", " for (nb_user, user) in tqdm(enumerate(reco[:,0])):\n", " u_rated_items=test_ui.indices[test_ui.indptr[user]:test_ui.indptr[user+1]]\n", " nb_u_rated_items=len(u_rated_items)\n", " if nb_u_rated_items>0: # skip users with no items in test set (still possible that there will be no super items)\n", " relevant_users+=1\n", " \n", " u_super_items=u_rated_items[np.vectorize(lambda x: x in super_reactions)\\\n", " (test_ui.data[test_ui.indptr[user]:test_ui.indptr[user+1]])]\n", " # more natural seems u_super_items=[item for item in u_rated_items if test_ui[user,item] in super_reactions]\n", " # but accesing test_ui[user,item] is expensive -we should avoid doing it\n", " if len(u_super_items)>0:\n", " super_relevant_users+=1\n", " \n", " user_successes=np.zeros(topK)\n", " nb_user_successes=0\n", " user_super_successes=np.zeros(topK)\n", " nb_user_super_successes=0\n", " \n", " # evaluation\n", " for (item_position,item) in enumerate(reco[nb_user,1:topK+1]):\n", " if item in u_rated_items:\n", " user_successes[item_position]=1\n", " nb_user_successes+=1\n", " if item in u_super_items:\n", " user_super_successes[item_position]=1\n", " nb_user_super_successes+=1\n", " \n", " prec_u=nb_user_successes/topK \n", " prec+=prec_u\n", " \n", " rec_u=nb_user_successes/nb_u_rated_items\n", " rec+=rec_u\n", " \n", " F_1+=2*(prec_u*rec_u)/(prec_u+rec_u) if prec_u+rec_u>0 else 0\n", " F_05+=(0.5**2+1)*(prec_u*rec_u)/(0.5**2*prec_u+rec_u) if prec_u+rec_u>0 else 0\n", " \n", " prec_super+=nb_user_super_successes/topK\n", " rec_super+=nb_user_super_successes/max(len(u_super_items),1) # to set 0 if no super items\n", " ndcg+=np.dot(user_successes,cg)/cg_sum[min(topK, nb_u_rated_items)-1]\n", " \n", " cumsum_successes=np.cumsum(user_successes)\n", " mAP+=np.dot(cumsum_successes/np.arange(1,topK+1), user_successes)/min(topK, nb_u_rated_items)\n", " MRR+=1/(user_successes.nonzero()[0][0]+1) if user_successes.nonzero()[0].size>0 else 0\n", " LAUC+=(np.dot(cumsum_successes, 1-user_successes)+\\\n", " (nb_user_successes+nb_u_rated_items)/2*((nb_items-nb_u_rated_items)-(topK-nb_user_successes)))/\\\n", " ((nb_items-nb_u_rated_items)*nb_u_rated_items)\n", " \n", " HR+=nb_user_successes>0\n", " \n", " \n", " result=[]\n", " result.append(('precision', prec/relevant_users))\n", " result.append(('recall', rec/relevant_users))\n", " result.append(('F_1', F_1/relevant_users))\n", " result.append(('F_05', F_05/relevant_users))\n", " result.append(('precision_super', prec_super/super_relevant_users))\n", " result.append(('recall_super', rec_super/super_relevant_users))\n", " result.append(('NDCG', ndcg/relevant_users))\n", " result.append(('mAP', mAP/relevant_users))\n", " result.append(('MRR', MRR/relevant_users))\n", " result.append(('LAUC', LAUC/relevant_users))\n", " result.append(('HR', HR/relevant_users))\n", "\n", " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n", " df_result.columns=list(zip(*result))[0]\n", " return df_result" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 9409.37it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
precisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHR
00.091410.0376520.046030.0612860.0796140.0564630.0959570.0431780.1981930.5155010.437964
\n", "
" ], "text/plain": [ " precision recall F_1 F_05 precision_super recall_super \\\n", "0 0.09141 0.037652 0.04603 0.061286 0.079614 0.056463 \n", "\n", " NDCG mAP MRR LAUC HR \n", "0 0.095957 0.043178 0.198193 0.515501 0.437964 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ranking_metrics(test_ui, reco, super_reactions=[4,5], topK=10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Diversity metrics" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def diversity_metrics(test_ui, reco, topK=10):\n", " \n", " frequencies=defaultdict(int)\n", " \n", " # let's assign 0 to all items in test set\n", " for item in list(set(test_ui.indices)):\n", " frequencies[item]=0\n", " \n", " # counting frequencies\n", " for item in reco[:,1:].flat:\n", " frequencies[item]+=1\n", " \n", " nb_reco_outside_test=frequencies[-1]\n", " del frequencies[-1]\n", " \n", " frequencies=np.array(list(frequencies.values()))\n", " \n", " nb_rec_items=len(frequencies[frequencies>0])\n", " nb_reco_inside_test=np.sum(frequencies)\n", " \n", " frequencies=frequencies/np.sum(frequencies)\n", " frequencies=np.sort(frequencies)\n", " \n", " with np.errstate(divide='ignore'): # let's put zeros put items with 0 frequency and ignore division warning\n", " log_frequencies=np.nan_to_num(np.log(frequencies), posinf=0, neginf=0)\n", " \n", " result=[]\n", " result.append(('Reco in test', nb_reco_inside_test/(nb_reco_inside_test+nb_reco_outside_test)))\n", " result.append(('Test coverage', nb_rec_items/test_ui.shape[1]))\n", " result.append(('Shannon', -np.dot(frequencies, log_frequencies)))\n", " result.append(('Gini', np.dot(frequencies, np.arange(1-len(frequencies), len(frequencies), 2))/(len(frequencies)-1)))\n", " \n", " df_result=(pd.DataFrame(list(zip(*result))[1])).T\n", " df_result.columns=list(zip(*result))[0]\n", " return df_result" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Reco in testTest coverageShannonGini
01.00.0339112.8365130.991139
\n", "
" ], "text/plain": [ " Reco in test Test coverage Shannon Gini\n", "0 1.0 0.033911 2.836513 0.991139" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# in case of errors try !pip3 install numpy==1.18.4 (or pip if you use python 2) and restart the kernel\n", "\n", "import evaluation_measures as ev\n", "import imp\n", "imp.reload(ev)\n", "\n", "x=diversity_metrics(test_ui, reco, topK=10)\n", "x" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# To be used in other notebooks" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 8290.29it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
00.9494590.7524870.091410.0376520.046030.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379641.00.0339112.8365130.991139
\n", "
" ], "text/plain": [ " RMSE MAE precision recall F_1 F_05 \\\n", "0 0.949459 0.752487 0.09141 0.037652 0.04603 0.061286 \n", "\n", " precision_super recall_super NDCG mAP MRR LAUC \\\n", "0 0.079614 0.056463 0.095957 0.043178 0.198193 0.515501 \n", "\n", " HR Reco in test Test coverage Shannon Gini \n", "0 0.437964 1.0 0.033911 2.836513 0.991139 " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import evaluation_measures as ev\n", "import imp\n", "imp.reload(ev)\n", "\n", "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)\n", "reco=np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')\n", "\n", "ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n", " estimations_df=estimations_df, \n", " reco=reco,\n", " super_reactions=[4,5])\n", "#also you can just type ev.evaluate_all(estimations_df, reco) - I put above values as default" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 9608.49it/s]\n", "943it [00:00, 9837.12it/s]\n", "943it [00:00, 10292.46it/s]\n", "943it [00:00, 9906.94it/s]\n", "943it [00:00, 9162.09it/s]\n" ] } ], "source": [ "import evaluation_measures as ev\n", "import imp\n", "imp.reload(ev)\n", "\n", "dir_path=\"Recommendations generated/ml-100k/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "\n", "df=ev.evaluate_all(test, dir_path, super_reactions)\n", "#also you can just type ev.evaluate_all() - I put above values as default" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_super
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.137473
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.056463
0Self_GlobalAvg1.1257600.9435340.0611880.0259680.0313830.0413430.0405580.032107
0Ready_Random1.5189641.2221590.0465540.0206030.0236790.0312160.0289700.021179
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.000189
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", "0 Ready_Random 1.518964 1.222159 0.046554 0.020603 0.023679 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "\n", " F_05 precision_super recall_super \n", "0 0.141584 0.130472 0.137473 \n", "0 0.061286 0.079614 0.056463 \n", "0 0.041343 0.040558 0.032107 \n", "0 0.031216 0.028970 0.021179 \n", "0 0.000463 0.000644 0.000189 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[:,:9]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_TopPop0.2146510.1117070.4009390.5555460.7656421.0000000.0389613.1590790.987317
0Ready_Baseline0.0959570.0431780.1981930.5155010.4379641.0000000.0339112.8365130.991139
0Self_GlobalAvg0.0676950.0274700.1711870.5095460.3849421.0000000.0259742.7117720.992003
0Ready_Random0.0504890.0191850.1238560.5068120.3223750.9878050.1847045.1031720.906873
0Self_BaselineUI0.0007520.0001680.0016770.4964240.0095440.6005300.0050511.8031260.996380
\n", "
" ], "text/plain": [ " Model NDCG mAP MRR LAUC HR \\\n", "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n", "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n", "0 Self_GlobalAvg 0.067695 0.027470 0.171187 0.509546 0.384942 \n", "0 Ready_Random 0.050489 0.019185 0.123856 0.506812 0.322375 \n", "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n", "\n", " Reco in test Test coverage Shannon Gini \n", "0 1.000000 0.038961 3.159079 0.987317 \n", "0 1.000000 0.033911 2.836513 0.991139 \n", "0 1.000000 0.025974 2.711772 0.992003 \n", "0 0.987805 0.184704 5.103172 0.906873 \n", "0 0.600530 0.005051 1.803126 0.996380 " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[:,np.append(0,np.arange(9, df.shape[1]))]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Check metrics on toy dataset" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "3it [00:00, 1651.52it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_BaselineUI1.6124521.40.4444440.8888890.5555560.4786320.3333330.750.6769070.5740740.6111110.6388891.00.8888890.81.3862940.25
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 F_05 \\\n", "0 Self_BaselineUI 1.612452 1.4 0.444444 0.888889 0.555556 0.478632 \n", "\n", " precision_super recall_super NDCG mAP MRR LAUC HR \\\n", "0 0.333333 0.75 0.676907 0.574074 0.611111 0.638889 1.0 \n", "\n", " Reco in test Test coverage Shannon Gini \n", "0 0.888889 0.8 1.386294 0.25 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Training data:\n" ] }, { "data": { "text/plain": [ "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n", " [0, 1, 2, 3, 0, 0, 0, 0],\n", " [0, 0, 0, 5, 0, 3, 4, 0]], dtype=int64)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Test data:\n" ] }, { "data": { "text/plain": [ "matrix([[0, 0, 0, 0, 0, 0, 3, 0],\n", " [0, 0, 0, 0, 5, 0, 0, 0],\n", " [5, 0, 4, 0, 0, 0, 0, 2]], dtype=int64)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Recommendations:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456
00305.0204.0604.0
110403.0602.0702.0
220405.0204.0704.0
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6\n", "0 0 30 5.0 20 4.0 60 4.0\n", "1 10 40 3.0 60 2.0 70 2.0\n", "2 20 40 5.0 20 4.0 70 4.0" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Estimations:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useritemest_score
00604.0
110403.0
22003.0
320204.0
420704.0
\n", "
" ], "text/plain": [ " user item est_score\n", "0 0 60 4.0\n", "1 10 40 3.0\n", "2 20 0 3.0\n", "3 20 20 4.0\n", "4 20 70 4.0" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import evaluation_measures as ev\n", "import imp\n", "import helpers\n", "imp.reload(ev)\n", "\n", "dir_path=\"Recommendations generated/toy-example/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None)\n", "\n", "display(ev.evaluate_all(test, dir_path, super_reactions, topK=3))\n", "#also you can just type ev.evaluate_all() - I put above values as default\n", "\n", "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", "reco=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_reco.csv', header=None)\n", "estimations=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_estimations.csv', names=['user', 'item', 'est_score'])\n", "toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \\\n", "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n", "\n", "print('Training data:')\n", "display(toy_train_ui.todense())\n", "\n", "print('Test data:')\n", "display(toy_test_ui.todense())\n", "\n", "print('Recommendations:')\n", "display(reco)\n", "\n", "print('Estimations:')\n", "display(estimations)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Sample recommendations" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Here is what user rated high:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userratingtitlegenres
247541055Gattaca (1997)Drama, Sci-Fi, Thriller
370731055Contact (1997)Drama, Sci-Fi
405921055Titanic (1997)Action, Drama, Romance
460321055L.A. Confidential (1997)Crime, Film-Noir, Mystery, Thriller
179161054English Patient, The (1996)Drama, Romance, War
237671054Chasing Amy (1997)Drama, Romance
470031054Cop Land (1997)Crime, Drama, Mystery
488911054Lost Highway (1997)Mystery
673551054Good Will Hunting (1997)Drama
541211053Boogie Nights (1997)Drama
554391053Seven Years in Tibet (1997)Drama, War
696871053Wag the Dog (1997)Comedy, Drama
73211052Saint, The (1997)Action, Romance, Thriller
450561052Tomorrow Never Dies (1997)Action, Romance, Thriller
640521052Alien: Resurrection (1997)Action, Horror, Sci-Fi
\n", "
" ], "text/plain": [ " user rating title \\\n", "24754 105 5 Gattaca (1997) \n", "37073 105 5 Contact (1997) \n", "40592 105 5 Titanic (1997) \n", "46032 105 5 L.A. Confidential (1997) \n", "17916 105 4 English Patient, The (1996) \n", "23767 105 4 Chasing Amy (1997) \n", "47003 105 4 Cop Land (1997) \n", "48891 105 4 Lost Highway (1997) \n", "67355 105 4 Good Will Hunting (1997) \n", "54121 105 3 Boogie Nights (1997) \n", "55439 105 3 Seven Years in Tibet (1997) \n", "69687 105 3 Wag the Dog (1997) \n", "7321 105 2 Saint, The (1997) \n", "45056 105 2 Tomorrow Never Dies (1997) \n", "64052 105 2 Alien: Resurrection (1997) \n", "\n", " genres \n", "24754 Drama, Sci-Fi, Thriller \n", "37073 Drama, Sci-Fi \n", "40592 Action, Drama, Romance \n", "46032 Crime, Film-Noir, Mystery, Thriller \n", "17916 Drama, Romance, War \n", "23767 Drama, Romance \n", "47003 Crime, Drama, Mystery \n", "48891 Mystery \n", "67355 Drama \n", "54121 Drama \n", "55439 Drama, War \n", "69687 Comedy, Drama \n", "7321 Action, Romance, Thriller \n", "45056 Action, Romance, Thriller \n", "64052 Action, Horror, Sci-Fi " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Here is what we recommend:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userrec_nbtitlegenres
103105.01Great Day in Harlem, A (1994)Documentary
1046105.02Tough and Deadly (1995)Action, Drama, Thriller
1988105.03Aiqing wansui (1994)Drama
2930105.04Delta of Venus (1994)Drama
3872105.05Someone Else's America (1995)Drama
4814105.06Saint of Fort Washington, The (1993)Drama
5755105.07Celestial Clockwork (1994)Comedy
6698105.08Some Mother's Son (1996)Drama
8592105.09Maya Lin: A Strong Clear Vision (1994)Documentary
7638105.010Prefontaine (1997)Drama
\n", "
" ], "text/plain": [ " user rec_nb title \\\n", "103 105.0 1 Great Day in Harlem, A (1994) \n", "1046 105.0 2 Tough and Deadly (1995) \n", "1988 105.0 3 Aiqing wansui (1994) \n", "2930 105.0 4 Delta of Venus (1994) \n", "3872 105.0 5 Someone Else's America (1995) \n", "4814 105.0 6 Saint of Fort Washington, The (1993) \n", "5755 105.0 7 Celestial Clockwork (1994) \n", "6698 105.0 8 Some Mother's Son (1996) \n", "8592 105.0 9 Maya Lin: A Strong Clear Vision (1994) \n", "7638 105.0 10 Prefontaine (1997) \n", "\n", " genres \n", "103 Documentary \n", "1046 Action, Drama, Thriller \n", "1988 Drama \n", "2930 Drama \n", "3872 Drama \n", "4814 Drama \n", "5755 Comedy \n", "6698 Drama \n", "8592 Documentary \n", "7638 Drama " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", "items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n", "\n", "user=random.choice(list(set(train['user'])))\n", "\n", "train_content=pd.merge(train, items, left_on='item', right_on='id')\n", "\n", "print('Here is what user rated high:')\n", "display(train_content[train_content['user']==user][['user', 'rating', 'title', 'genres']]\\\n", " .sort_values(by='rating', ascending=False)[:15])\n", "\n", "reco = np.loadtxt('Recommendations generated/ml-100k/Self_BaselineUI_reco.csv', delimiter=',')\n", "items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n", "\n", "# Let's ignore scores - they are not used in evaluation: \n", "reco_users=reco[:,:1]\n", "reco_items=reco[:,1::2]\n", "# Let's put them into one array\n", "reco=np.concatenate((reco_users, reco_items), axis=1)\n", "\n", "# Let's rebuild it user-item dataframe\n", "recommended=[]\n", "for row in reco:\n", " for rec_nb, entry in enumerate(row[1:]):\n", " recommended.append((row[0], rec_nb+1, entry))\n", "recommended=pd.DataFrame(recommended, columns=['user','rec_nb', 'item'])\n", "\n", "recommended_content=pd.merge(recommended, items, left_on='item', right_on='id')\n", "\n", "print('Here is what we recommend:')\n", "recommended_content[recommended_content['user']==user][['user', 'rec_nb', 'title', 'genres']].sort_values(by='rec_nb')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# project task 3: implement some other evaluation measure" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [], "source": [ "# it may be your idea, modification of what we have already implemented \n", "# (for example Hit2 rate which would count as a success users whoreceived at least 2 relevant recommendations) \n", "# or something well-known\n", "# expected output: modification of evaluation_measures.py such that evaluate_all will also display your measure" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 9023.32it/s]\n", "943it [00:00, 9004.98it/s]\n", "943it [00:00, 8532.40it/s]\n", "943it [00:00, 8974.06it/s]\n", "943it [00:00, 8818.01it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.1374730.2146510.1117070.4009390.5555460.7656421.0000000.0389613.1590790.987317
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379641.0000000.0339112.8365130.991139
0Self_GlobalAvg1.1257600.9435340.0611880.0259680.0313830.0413430.0405580.0321070.0676950.0274700.1711870.5095460.3849421.0000000.0259742.7117720.992003
0Ready_Random1.5189641.2221590.0465540.0206030.0236790.0312160.0289700.0211790.0504890.0191850.1238560.5068120.3223750.9878050.1847045.1031720.906873
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.0001890.0007520.0001680.0016770.4964240.0095440.6005300.0050511.8031260.996380
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", "0 Ready_Random 1.518964 1.222159 0.046554 0.020603 0.023679 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "\n", " F_05 precision_super recall_super NDCG mAP MRR \\\n", "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n", "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n", "0 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 \n", "0 0.031216 0.028970 0.021179 0.050489 0.019185 0.123856 \n", "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n", "\n", " LAUC HR Reco in test Test coverage Shannon Gini \n", "0 0.555546 0.765642 1.000000 0.038961 3.159079 0.987317 \n", "0 0.515501 0.437964 1.000000 0.033911 2.836513 0.991139 \n", "0 0.509546 0.384942 1.000000 0.025974 2.711772 0.992003 \n", "0 0.506812 0.322375 0.987805 0.184704 5.103172 0.906873 \n", "0 0.496424 0.009544 0.600530 0.005051 1.803126 0.996380 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dir_path=\"Recommendations generated/ml-100k/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "\n", "ev.evaluate_all(test, dir_path, super_reactions)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }