workshops_recommender_systems/P2. Evaluation.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prepare test set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "slideshow": {
     "slide_type": "-"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scipy.sparse as sparse\n",
    "from collections import defaultdict\n",
    "from itertools import chain\n",
    "import random\n",
    "from tqdm import tqdm\n",
    "\n",
    "# In evaluation we do not load train set - it is not needed\n",
    "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
    "test.columns=['user', 'item', 'rating', 'timestamp']\n",
    "\n",
    "test['user_code'] = test['user'].astype(\"category\").cat.codes\n",
    "test['item_code'] = test['item'].astype(\"category\").cat.codes\n",
    "\n",
    "user_code_id = dict(enumerate(test['user'].astype(\"category\").cat.categories))\n",
    "user_id_code = dict((v, k) for k, v in user_code_id.items())\n",
    "item_code_id = dict(enumerate(test['item'].astype(\"category\").cat.categories))\n",
    "item_id_code = dict((v, k) for k, v in item_code_id.items())\n",
    "\n",
    "test_ui = sparse.csr_matrix((test['rating'], (test['user_code'], test['item_code'])))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Estimations metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)\n",
    "estimations_df.columns=['user', 'item' ,'score']\n",
    "\n",
    "estimations_df['user_code']=[user_id_code[user] for user in estimations_df['user']]\n",
    "estimations_df['item_code']=[item_id_code[item] for item in estimations_df['item']]\n",
    "estimations=sparse.csr_matrix((estimations_df['score'], (estimations_df['user_code'], estimations_df['item_code'])), shape=test_ui.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def estimations_metrics(test_ui, estimations):\n",
    "    result=[]\n",
    "\n",
    "    RMSE=(np.sum((estimations.data-test_ui.data)**2)/estimations.nnz)**(1/2)\n",
    "    result.append(['RMSE', RMSE])\n",
    "\n",
    "    MAE=np.sum(abs(estimations.data-test_ui.data))/estimations.nnz\n",
    "    result.append(['MAE', MAE])\n",
    "    \n",
    "    df_result=(pd.DataFrame(list(zip(*result))[1])).T\n",
    "    df_result.columns=list(zip(*result))[0]\n",
    "    return df_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>RMSE</th>\n",
       "      <th>MAE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.949459</td>\n",
       "      <td>0.752487</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       RMSE       MAE\n",
       "0  0.949459  0.752487"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# in case of error (in the laboratories) you might have to switch to the other version of pandas\n",
    "# try !pip3 install pandas=='1.0.3' (or pip if you use python 2) and restart the kernel\n",
    "\n",
    "estimations_metrics(test_ui, estimations)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Ranking metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[663, 475,  62, ..., 472, 269, 503],\n",
       "       [ 48, 313, 475, ..., 591, 175, 466],\n",
       "       [351, 313, 475, ..., 591, 175, 466],\n",
       "       ...,\n",
       "       [259, 313, 475, ...,  11, 591, 175],\n",
       "       [ 33, 313, 475, ...,  11, 591, 175],\n",
       "       [ 77, 313, 475, ...,  11, 591, 175]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "reco = np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')\n",
    "# Let's ignore scores - they are not used in evaluation: \n",
    "users=reco[:,:1]\n",
    "items=reco[:,1::2]\n",
    "# Let's use inner ids instead of real ones\n",
    "users=np.vectorize(lambda x: user_id_code.setdefault(x, -1))(users)\n",
    "items=np.vectorize(lambda x: item_id_code.setdefault(x, -1))(items) # maybe items we recommend are not in test set\n",
    "# Let's put them into one array\n",
    "reco=np.concatenate((users, items), axis=1)\n",
    "reco"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def ranking_metrics(test_ui, reco, super_reactions=[], topK=10):\n",
    "    \n",
    "    nb_items=test_ui.shape[1]\n",
    "    relevant_users, super_relevant_users, prec, rec, F_1, F_05, prec_super, rec_super, ndcg, mAP, MRR, LAUC, HR=\\\n",
    "    0,0,0,0,0,0,0,0,0,0,0,0,0\n",
    "    \n",
    "    cg = (1.0 / np.log2(np.arange(2, topK + 2)))\n",
    "    cg_sum = np.cumsum(cg)\n",
    "    \n",
    "    for (nb_user, user) in tqdm(enumerate(reco[:,0])):\n",
    "        u_rated_items=test_ui.indices[test_ui.indptr[user]:test_ui.indptr[user+1]]\n",
    "        nb_u_rated_items=len(u_rated_items)\n",
    "        if nb_u_rated_items>0: # skip users with no items in test set (still possible that there will be no super items)\n",
    "            relevant_users+=1\n",
    "            \n",
    "            u_super_items=u_rated_items[np.vectorize(lambda x: x in super_reactions)\\\n",
    "            (test_ui.data[test_ui.indptr[user]:test_ui.indptr[user+1]])]\n",
    "            # more natural seems u_super_items=[item for item in u_rated_items if test_ui[user,item] in super_reactions]\n",
    "            # but accesing test_ui[user,item] is expensive -we should avoid doing it\n",
    "            if len(u_super_items)>0:\n",
    "                super_relevant_users+=1\n",
    "            \n",
    "            user_successes=np.zeros(topK)\n",
    "            nb_user_successes=0\n",
    "            user_super_successes=np.zeros(topK)\n",
    "            nb_user_super_successes=0\n",
    "            \n",
    "            # evaluation\n",
    "            for (item_position,item) in enumerate(reco[nb_user,1:topK+1]):\n",
    "                if item in u_rated_items:\n",
    "                    user_successes[item_position]=1\n",
    "                    nb_user_successes+=1\n",
    "                    if item in u_super_items:\n",
    "                        user_super_successes[item_position]=1\n",
    "                        nb_user_super_successes+=1\n",
    "                        \n",
    "            prec_u=nb_user_successes/topK \n",
    "            prec+=prec_u\n",
    "            \n",
    "            rec_u=nb_user_successes/nb_u_rated_items\n",
    "            rec+=rec_u\n",
    "            \n",
    "            F_1+=2*(prec_u*rec_u)/(prec_u+rec_u) if prec_u+rec_u>0 else 0\n",
    "            F_05+=(0.5**2+1)*(prec_u*rec_u)/(0.5**2*prec_u+rec_u) if prec_u+rec_u>0 else 0\n",
    "            \n",
    "            prec_super+=nb_user_super_successes/topK\n",
    "            rec_super+=nb_user_super_successes/max(len(u_super_items),1) # to set 0 if no super items\n",
    "            ndcg+=np.dot(user_successes,cg)/cg_sum[min(topK, nb_u_rated_items)-1]\n",
    "            \n",
    "            cumsum_successes=np.cumsum(user_successes)\n",
    "            mAP+=np.dot(cumsum_successes/np.arange(1,topK+1), user_successes)/min(topK, nb_u_rated_items)\n",
    "            MRR+=1/(user_successes.nonzero()[0][0]+1) if user_successes.nonzero()[0].size>0 else 0\n",
    "            LAUC+=(np.dot(cumsum_successes, 1-user_successes)+\\\n",
    "            (nb_user_successes+nb_u_rated_items)/2*((nb_items-nb_u_rated_items)-(topK-nb_user_successes)))/\\\n",
    "            ((nb_items-nb_u_rated_items)*nb_u_rated_items)\n",
    "            \n",
    "            HR+=nb_user_successes>0\n",
    "            \n",
    "            \n",
    "    result=[]\n",
    "    result.append(('precision', prec/relevant_users))\n",
    "    result.append(('recall', rec/relevant_users))\n",
    "    result.append(('F_1', F_1/relevant_users))\n",
    "    result.append(('F_05', F_05/relevant_users))\n",
    "    result.append(('precision_super', prec_super/super_relevant_users))\n",
    "    result.append(('recall_super', rec_super/super_relevant_users))\n",
    "    result.append(('NDCG', ndcg/relevant_users))\n",
    "    result.append(('mAP', mAP/relevant_users))\n",
    "    result.append(('MRR', MRR/relevant_users))\n",
    "    result.append(('LAUC', LAUC/relevant_users))\n",
    "    result.append(('HR', HR/relevant_users))\n",
    "\n",
    "    df_result=(pd.DataFrame(list(zip(*result))[1])).T\n",
    "    df_result.columns=list(zip(*result))[0]\n",
    "    return df_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "943it [00:00, 11673.30it/s]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>F_1</th>\n",
       "      <th>F_05</th>\n",
       "      <th>precision_super</th>\n",
       "      <th>recall_super</th>\n",
       "      <th>NDCG</th>\n",
       "      <th>mAP</th>\n",
       "      <th>MRR</th>\n",
       "      <th>LAUC</th>\n",
       "      <th>HR</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.09141</td>\n",
       "      <td>0.037652</td>\n",
       "      <td>0.04603</td>\n",
       "      <td>0.061286</td>\n",
       "      <td>0.079614</td>\n",
       "      <td>0.056463</td>\n",
       "      <td>0.095957</td>\n",
       "      <td>0.043178</td>\n",
       "      <td>0.198193</td>\n",
       "      <td>0.515501</td>\n",
       "      <td>0.437964</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   precision    recall      F_1      F_05  precision_super  recall_super  \\\n",
       "0    0.09141  0.037652  0.04603  0.061286         0.079614      0.056463   \n",
       "\n",
       "       NDCG       mAP       MRR      LAUC        HR  \n",
       "0  0.095957  0.043178  0.198193  0.515501  0.437964  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ranking_metrics(test_ui, reco, super_reactions=[4,5], topK=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Diversity metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def diversity_metrics(test_ui, reco, topK=10):\n",
    "    \n",
    "    frequencies=defaultdict(int)\n",
    "    \n",
    "    # let's assign 0 to all items in test set\n",
    "    for item in list(set(test_ui.indices)):\n",
    "        frequencies[item]=0\n",
    "        \n",
    "    # counting frequencies\n",
    "    for item in reco[:,1:].flat:\n",
    "        frequencies[item]+=1\n",
    "        \n",
    "    nb_reco_outside_test=frequencies[-1]\n",
    "    del frequencies[-1]\n",
    "    \n",
    "    frequencies=np.array(list(frequencies.values()))\n",
    "                         \n",
    "    nb_rec_items=len(frequencies[frequencies>0])\n",
    "    nb_reco_inside_test=np.sum(frequencies)\n",
    "                         \n",
    "    frequencies=frequencies/np.sum(frequencies)\n",
    "    frequencies=np.sort(frequencies)\n",
    "    \n",
    "    with np.errstate(divide='ignore'): # let's put zeros put items with 0 frequency and ignore division warning\n",
    "        log_frequencies=np.nan_to_num(np.log(frequencies), posinf=0, neginf=0)\n",
    "                         \n",
    "    result=[]\n",
    "    result.append(('Reco in test', nb_reco_inside_test/(nb_reco_inside_test+nb_reco_outside_test)))\n",
    "    result.append(('Test coverage', nb_rec_items/test_ui.shape[1]))\n",
    "    result.append(('Shannon', -np.dot(frequencies, log_frequencies)))\n",
    "    result.append(('Gini', np.dot(frequencies, np.arange(1-len(frequencies), len(frequencies), 2))/(len(frequencies)-1)))\n",
    "    \n",
    "    df_result=(pd.DataFrame(list(zip(*result))[1])).T\n",
    "    df_result.columns=list(zip(*result))[0]\n",
    "    return df_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Reco in test</th>\n",
       "      <th>Test coverage</th>\n",
       "      <th>Shannon</th>\n",
       "      <th>Gini</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.033911</td>\n",
       "      <td>2.836513</td>\n",
       "      <td>0.991139</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Reco in test  Test coverage   Shannon      Gini\n",
       "0           1.0       0.033911  2.836513  0.991139"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# in case of errors try !pip3 install numpy==1.18.4 (or pip if you use python 2) and restart the kernel\n",
    "\n",
    "import evaluation_measures as ev\n",
    "import imp\n",
    "imp.reload(ev)\n",
    "\n",
    "x=diversity_metrics(test_ui, reco, topK=10)\n",
    "x"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# To be used in other notebooks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "943it [00:00, 10629.42it/s]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>RMSE</th>\n",
       "      <th>MAE</th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>F_1</th>\n",
       "      <th>F_05</th>\n",
       "      <th>precision_super</th>\n",
       "      <th>recall_super</th>\n",
       "      <th>NDCG</th>\n",
       "      <th>mAP</th>\n",
       "      <th>MRR</th>\n",
       "      <th>LAUC</th>\n",
       "      <th>HR</th>\n",
       "      <th>Reco in test</th>\n",
       "      <th>Test coverage</th>\n",
       "      <th>Shannon</th>\n",
       "      <th>Gini</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.949459</td>\n",
       "      <td>0.752487</td>\n",
       "      <td>0.09141</td>\n",
       "      <td>0.037652</td>\n",
       "      <td>0.04603</td>\n",
       "      <td>0.061286</td>\n",
       "      <td>0.079614</td>\n",
       "      <td>0.056463</td>\n",
       "      <td>0.095957</td>\n",
       "      <td>0.043178</td>\n",
       "      <td>0.198193</td>\n",
       "      <td>0.515501</td>\n",
       "      <td>0.437964</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.033911</td>\n",
       "      <td>2.836513</td>\n",
       "      <td>0.991139</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       RMSE       MAE  precision    recall      F_1      F_05  \\\n",
       "0  0.949459  0.752487    0.09141  0.037652  0.04603  0.061286   \n",
       "\n",
       "   precision_super  recall_super      NDCG       mAP       MRR      LAUC  \\\n",
       "0         0.079614      0.056463  0.095957  0.043178  0.198193  0.515501   \n",
       "\n",
       "         HR  Reco in test  Test coverage   Shannon      Gini  \n",
       "0  0.437964           1.0       0.033911  2.836513  0.991139  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import evaluation_measures as ev\n",
    "import imp\n",
    "imp.reload(ev)\n",
    "\n",
    "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)\n",
    "reco=np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')\n",
    "\n",
    "ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
    "            estimations_df=estimations_df, \n",
    "            reco=reco,\n",
    "            super_reactions=[4,5])\n",
    "#also you can just type ev.evaluate_all(estimations_df, reco) - I put above values as default"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "943it [00:00, 11391.79it/s]\n",
      "943it [00:00, 10740.34it/s]\n",
      "943it [00:00, 11967.55it/s]\n",
      "943it [00:00, 12152.51it/s]\n",
      "943it [00:00, 12581.48it/s]\n",
      "943it [00:00, 12607.27it/s]\n",
      "943it [00:00, 12116.29it/s]\n"
     ]
    }
   ],
   "source": [
    "import evaluation_measures as ev\n",
    "import imp\n",
    "imp.reload(ev)\n",
    "\n",
    "dir_path=\"Recommendations generated/ml-100k/\"\n",
    "super_reactions=[4,5]\n",
    "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
    "\n",
    "df=ev.evaluate_all(test, dir_path, super_reactions)\n",
    "#also you can just type ev.evaluate_all() - I put above values as default"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>RMSE</th>\n",
       "      <th>MAE</th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>F_1</th>\n",
       "      <th>F_05</th>\n",
       "      <th>precision_super</th>\n",
       "      <th>recall_super</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_TopPop</td>\n",
       "      <td>2.508258</td>\n",
       "      <td>2.217909</td>\n",
       "      <td>0.188865</td>\n",
       "      <td>0.116919</td>\n",
       "      <td>0.118732</td>\n",
       "      <td>0.141584</td>\n",
       "      <td>0.130472</td>\n",
       "      <td>0.137473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ready_Baseline</td>\n",
       "      <td>0.949459</td>\n",
       "      <td>0.752487</td>\n",
       "      <td>0.091410</td>\n",
       "      <td>0.037652</td>\n",
       "      <td>0.046030</td>\n",
       "      <td>0.061286</td>\n",
       "      <td>0.079614</td>\n",
       "      <td>0.056463</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_GlobalAvg</td>\n",
       "      <td>1.125760</td>\n",
       "      <td>0.943534</td>\n",
       "      <td>0.061188</td>\n",
       "      <td>0.025968</td>\n",
       "      <td>0.031383</td>\n",
       "      <td>0.041343</td>\n",
       "      <td>0.040558</td>\n",
       "      <td>0.032107</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_TopRated</td>\n",
       "      <td>1.244026</td>\n",
       "      <td>1.000400</td>\n",
       "      <td>0.061188</td>\n",
       "      <td>0.025968</td>\n",
       "      <td>0.031383</td>\n",
       "      <td>0.041343</td>\n",
       "      <td>0.040558</td>\n",
       "      <td>0.032107</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ready_Random</td>\n",
       "      <td>1.518962</td>\n",
       "      <td>1.216659</td>\n",
       "      <td>0.046978</td>\n",
       "      <td>0.019905</td>\n",
       "      <td>0.023281</td>\n",
       "      <td>0.030951</td>\n",
       "      <td>0.029399</td>\n",
       "      <td>0.019829</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_BaselineIU</td>\n",
       "      <td>0.958136</td>\n",
       "      <td>0.754051</td>\n",
       "      <td>0.000954</td>\n",
       "      <td>0.000188</td>\n",
       "      <td>0.000298</td>\n",
       "      <td>0.000481</td>\n",
       "      <td>0.000644</td>\n",
       "      <td>0.000223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_BaselineUI</td>\n",
       "      <td>0.967585</td>\n",
       "      <td>0.762740</td>\n",
       "      <td>0.000954</td>\n",
       "      <td>0.000170</td>\n",
       "      <td>0.000278</td>\n",
       "      <td>0.000463</td>\n",
       "      <td>0.000644</td>\n",
       "      <td>0.000189</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Model      RMSE       MAE  precision    recall       F_1  \\\n",
       "0      Self_TopPop  2.508258  2.217909   0.188865  0.116919  0.118732   \n",
       "0   Ready_Baseline  0.949459  0.752487   0.091410  0.037652  0.046030   \n",
       "0   Self_GlobalAvg  1.125760  0.943534   0.061188  0.025968  0.031383   \n",
       "0    Self_TopRated  1.244026  1.000400   0.061188  0.025968  0.031383   \n",
       "0     Ready_Random  1.518962  1.216659   0.046978  0.019905  0.023281   \n",
       "0  Self_BaselineIU  0.958136  0.754051   0.000954  0.000188  0.000298   \n",
       "0  Self_BaselineUI  0.967585  0.762740   0.000954  0.000170  0.000278   \n",
       "\n",
       "       F_05  precision_super  recall_super  \n",
       "0  0.141584         0.130472      0.137473  \n",
       "0  0.061286         0.079614      0.056463  \n",
       "0  0.041343         0.040558      0.032107  \n",
       "0  0.041343         0.040558      0.032107  \n",
       "0  0.030951         0.029399      0.019829  \n",
       "0  0.000481         0.000644      0.000223  \n",
       "0  0.000463         0.000644      0.000189  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[:,:9]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>NDCG</th>\n",
       "      <th>mAP</th>\n",
       "      <th>MRR</th>\n",
       "      <th>LAUC</th>\n",
       "      <th>HR</th>\n",
       "      <th>Reco in test</th>\n",
       "      <th>Test coverage</th>\n",
       "      <th>Shannon</th>\n",
       "      <th>Gini</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_TopPop</td>\n",
       "      <td>0.214651</td>\n",
       "      <td>0.111707</td>\n",
       "      <td>0.400939</td>\n",
       "      <td>0.555546</td>\n",
       "      <td>0.765642</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.038961</td>\n",
       "      <td>3.159079</td>\n",
       "      <td>0.987317</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ready_Baseline</td>\n",
       "      <td>0.095957</td>\n",
       "      <td>0.043178</td>\n",
       "      <td>0.198193</td>\n",
       "      <td>0.515501</td>\n",
       "      <td>0.437964</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.033911</td>\n",
       "      <td>2.836513</td>\n",
       "      <td>0.991139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_GlobalAvg</td>\n",
       "      <td>0.067695</td>\n",
       "      <td>0.027470</td>\n",
       "      <td>0.171187</td>\n",
       "      <td>0.509546</td>\n",
       "      <td>0.384942</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.025974</td>\n",
       "      <td>2.711772</td>\n",
       "      <td>0.992003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_TopRated</td>\n",
       "      <td>0.067695</td>\n",
       "      <td>0.027470</td>\n",
       "      <td>0.171187</td>\n",
       "      <td>0.509546</td>\n",
       "      <td>0.384942</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.025974</td>\n",
       "      <td>2.711772</td>\n",
       "      <td>0.992003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ready_Random</td>\n",
       "      <td>0.052066</td>\n",
       "      <td>0.021101</td>\n",
       "      <td>0.129283</td>\n",
       "      <td>0.506463</td>\n",
       "      <td>0.314952</td>\n",
       "      <td>0.987911</td>\n",
       "      <td>0.184704</td>\n",
       "      <td>5.110269</td>\n",
       "      <td>0.905724</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_BaselineIU</td>\n",
       "      <td>0.001043</td>\n",
       "      <td>0.000335</td>\n",
       "      <td>0.003348</td>\n",
       "      <td>0.496433</td>\n",
       "      <td>0.009544</td>\n",
       "      <td>0.699046</td>\n",
       "      <td>0.005051</td>\n",
       "      <td>1.945910</td>\n",
       "      <td>0.995669</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_BaselineUI</td>\n",
       "      <td>0.000752</td>\n",
       "      <td>0.000168</td>\n",
       "      <td>0.001677</td>\n",
       "      <td>0.496424</td>\n",
       "      <td>0.009544</td>\n",
       "      <td>0.600530</td>\n",
       "      <td>0.005051</td>\n",
       "      <td>1.803126</td>\n",
       "      <td>0.996380</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Model      NDCG       mAP       MRR      LAUC        HR  \\\n",
       "0      Self_TopPop  0.214651  0.111707  0.400939  0.555546  0.765642   \n",
       "0   Ready_Baseline  0.095957  0.043178  0.198193  0.515501  0.437964   \n",
       "0   Self_GlobalAvg  0.067695  0.027470  0.171187  0.509546  0.384942   \n",
       "0    Self_TopRated  0.067695  0.027470  0.171187  0.509546  0.384942   \n",
       "0     Ready_Random  0.052066  0.021101  0.129283  0.506463  0.314952   \n",
       "0  Self_BaselineIU  0.001043  0.000335  0.003348  0.496433  0.009544   \n",
       "0  Self_BaselineUI  0.000752  0.000168  0.001677  0.496424  0.009544   \n",
       "\n",
       "   Reco in test  Test coverage   Shannon      Gini  \n",
       "0      1.000000       0.038961  3.159079  0.987317  \n",
       "0      1.000000       0.033911  2.836513  0.991139  \n",
       "0      1.000000       0.025974  2.711772  0.992003  \n",
       "0      1.000000       0.025974  2.711772  0.992003  \n",
       "0      0.987911       0.184704  5.110269  0.905724  \n",
       "0      0.699046       0.005051  1.945910  0.995669  \n",
       "0      0.600530       0.005051  1.803126  0.996380  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[:,np.append(0,np.arange(9, df.shape[1]))]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Check metrics on toy dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3it [00:00, 3005.95it/s]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>RMSE</th>\n",
       "      <th>MAE</th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>F_1</th>\n",
       "      <th>F_05</th>\n",
       "      <th>precision_super</th>\n",
       "      <th>recall_super</th>\n",
       "      <th>NDCG</th>\n",
       "      <th>mAP</th>\n",
       "      <th>MRR</th>\n",
       "      <th>LAUC</th>\n",
       "      <th>HR</th>\n",
       "      <th>Reco in test</th>\n",
       "      <th>Test coverage</th>\n",
       "      <th>Shannon</th>\n",
       "      <th>Gini</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_BaselineUI</td>\n",
       "      <td>1.612452</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.444444</td>\n",
       "      <td>0.888889</td>\n",
       "      <td>0.555556</td>\n",
       "      <td>0.478632</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.75</td>\n",
       "      <td>0.676907</td>\n",
       "      <td>0.574074</td>\n",
       "      <td>0.611111</td>\n",
       "      <td>0.638889</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.888889</td>\n",
       "      <td>0.8</td>\n",
       "      <td>1.386294</td>\n",
       "      <td>0.25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Model      RMSE  MAE  precision    recall       F_1      F_05  \\\n",
       "0  Self_BaselineUI  1.612452  1.4   0.444444  0.888889  0.555556  0.478632   \n",
       "\n",
       "   precision_super  recall_super      NDCG       mAP       MRR      LAUC   HR  \\\n",
       "0         0.333333          0.75  0.676907  0.574074  0.611111  0.638889  1.0   \n",
       "\n",
       "   Reco in test  Test coverage   Shannon  Gini  \n",
       "0      0.888889            0.8  1.386294  0.25  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training data:\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n",
       "        [0, 1, 2, 3, 0, 0, 0, 0],\n",
       "        [0, 0, 0, 5, 0, 3, 4, 0]], dtype=int64)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test data:\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "matrix([[0, 0, 0, 0, 0, 0, 3, 0],\n",
       "        [0, 0, 0, 0, 5, 0, 0, 0],\n",
       "        [5, 0, 4, 0, 0, 0, 0, 2]], dtype=int64)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Recommendations:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>30</td>\n",
       "      <td>5.0</td>\n",
       "      <td>20</td>\n",
       "      <td>4.0</td>\n",
       "      <td>60</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10</td>\n",
       "      <td>40</td>\n",
       "      <td>3.0</td>\n",
       "      <td>60</td>\n",
       "      <td>2.0</td>\n",
       "      <td>70</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20</td>\n",
       "      <td>40</td>\n",
       "      <td>5.0</td>\n",
       "      <td>20</td>\n",
       "      <td>4.0</td>\n",
       "      <td>70</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0   1    2   3    4   5    6\n",
       "0   0  30  5.0  20  4.0  60  4.0\n",
       "1  10  40  3.0  60  2.0  70  2.0\n",
       "2  20  40  5.0  20  4.0  70  4.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Estimations:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>item</th>\n",
       "      <th>est_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>60</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10</td>\n",
       "      <td>40</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20</td>\n",
       "      <td>70</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user  item  est_score\n",
       "0     0    60        4.0\n",
       "1    10    40        3.0\n",
       "2    20     0        3.0\n",
       "3    20    20        4.0\n",
       "4    20    70        4.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import evaluation_measures as ev\n",
    "import imp\n",
    "import helpers\n",
    "imp.reload(ev)\n",
    "\n",
    "dir_path=\"Recommendations generated/toy-example/\"\n",
    "super_reactions=[4,5]\n",
    "test=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None)\n",
    "\n",
    "display(ev.evaluate_all(test, dir_path, super_reactions, topK=3))\n",
    "#also you can just type ev.evaluate_all() - I put above values as default\n",
    "\n",
    "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
    "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
    "reco=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_reco.csv', header=None)\n",
    "estimations=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_estimations.csv', names=['user', 'item', 'est_score'])\n",
    "toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \\\n",
    "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n",
    "\n",
    "print('Training data:')\n",
    "display(toy_train_ui.todense())\n",
    "\n",
    "print('Test data:')\n",
    "display(toy_test_ui.todense())\n",
    "\n",
    "print('Recommendations:')\n",
    "display(reco)\n",
    "\n",
    "print('Estimations:')\n",
    "display(estimations)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sample recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Here is what user rated high:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>rating</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>24578</th>\n",
       "      <td>163</td>\n",
       "      <td>4</td>\n",
       "      <td>One Flew Over the Cuckoo's Nest (1975)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12760</th>\n",
       "      <td>163</td>\n",
       "      <td>4</td>\n",
       "      <td>Dances with Wolves (1990)</td>\n",
       "      <td>Adventure, Drama, Western</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43866</th>\n",
       "      <td>163</td>\n",
       "      <td>4</td>\n",
       "      <td>Pulp Fiction (1994)</td>\n",
       "      <td>Crime, Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37331</th>\n",
       "      <td>163</td>\n",
       "      <td>4</td>\n",
       "      <td>Contact (1997)</td>\n",
       "      <td>Drama, Sci-Fi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36057</th>\n",
       "      <td>163</td>\n",
       "      <td>4</td>\n",
       "      <td>Silence of the Lambs, The (1991)</td>\n",
       "      <td>Drama, Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31838</th>\n",
       "      <td>163</td>\n",
       "      <td>4</td>\n",
       "      <td>Shawshank Redemption, The (1994)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67376</th>\n",
       "      <td>163</td>\n",
       "      <td>4</td>\n",
       "      <td>Good Will Hunting (1997)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21714</th>\n",
       "      <td>163</td>\n",
       "      <td>3</td>\n",
       "      <td>In &amp; Out (1997)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22153</th>\n",
       "      <td>163</td>\n",
       "      <td>3</td>\n",
       "      <td>Groundhog Day (1993)</td>\n",
       "      <td>Comedy, Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19155</th>\n",
       "      <td>163</td>\n",
       "      <td>3</td>\n",
       "      <td>Scream (1996)</td>\n",
       "      <td>Horror, Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26457</th>\n",
       "      <td>163</td>\n",
       "      <td>3</td>\n",
       "      <td>Air Force One (1997)</td>\n",
       "      <td>Action, Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17924</th>\n",
       "      <td>163</td>\n",
       "      <td>3</td>\n",
       "      <td>English Patient, The (1996)</td>\n",
       "      <td>Drama, Romance, War</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16410</th>\n",
       "      <td>163</td>\n",
       "      <td>3</td>\n",
       "      <td>Apollo 13 (1995)</td>\n",
       "      <td>Action, Drama, Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38849</th>\n",
       "      <td>163</td>\n",
       "      <td>3</td>\n",
       "      <td>Full Monty, The (1997)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3992</th>\n",
       "      <td>163</td>\n",
       "      <td>3</td>\n",
       "      <td>G.I. Jane (1997)</td>\n",
       "      <td>Action, Drama, War</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       user  rating                                   title  \\\n",
       "24578   163       4  One Flew Over the Cuckoo's Nest (1975)   \n",
       "12760   163       4               Dances with Wolves (1990)   \n",
       "43866   163       4                     Pulp Fiction (1994)   \n",
       "37331   163       4                          Contact (1997)   \n",
       "36057   163       4        Silence of the Lambs, The (1991)   \n",
       "31838   163       4        Shawshank Redemption, The (1994)   \n",
       "67376   163       4                Good Will Hunting (1997)   \n",
       "21714   163       3                         In & Out (1997)   \n",
       "22153   163       3                    Groundhog Day (1993)   \n",
       "19155   163       3                           Scream (1996)   \n",
       "26457   163       3                    Air Force One (1997)   \n",
       "17924   163       3             English Patient, The (1996)   \n",
       "16410   163       3                        Apollo 13 (1995)   \n",
       "38849   163       3                  Full Monty, The (1997)   \n",
       "3992    163       3                        G.I. Jane (1997)   \n",
       "\n",
       "                          genres  \n",
       "24578                      Drama  \n",
       "12760  Adventure, Drama, Western  \n",
       "43866               Crime, Drama  \n",
       "37331              Drama, Sci-Fi  \n",
       "36057            Drama, Thriller  \n",
       "31838                      Drama  \n",
       "67376                      Drama  \n",
       "21714                     Comedy  \n",
       "22153            Comedy, Romance  \n",
       "19155           Horror, Thriller  \n",
       "26457           Action, Thriller  \n",
       "17924        Drama, Romance, War  \n",
       "16410    Action, Drama, Thriller  \n",
       "38849                     Comedy  \n",
       "3992          Action, Drama, War  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Here is what we recommend:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>rec_nb</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>161</th>\n",
       "      <td>163.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Great Day in Harlem, A (1994)</td>\n",
       "      <td>Documentary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1104</th>\n",
       "      <td>163.0</td>\n",
       "      <td>2</td>\n",
       "      <td>Tough and Deadly (1995)</td>\n",
       "      <td>Action, Drama, Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2046</th>\n",
       "      <td>163.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Aiqing wansui (1994)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2988</th>\n",
       "      <td>163.0</td>\n",
       "      <td>4</td>\n",
       "      <td>Delta of Venus (1994)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3930</th>\n",
       "      <td>163.0</td>\n",
       "      <td>5</td>\n",
       "      <td>Someone Else's America (1995)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4872</th>\n",
       "      <td>163.0</td>\n",
       "      <td>6</td>\n",
       "      <td>Saint of Fort Washington, The (1993)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5813</th>\n",
       "      <td>163.0</td>\n",
       "      <td>7</td>\n",
       "      <td>Celestial Clockwork (1994)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6756</th>\n",
       "      <td>163.0</td>\n",
       "      <td>8</td>\n",
       "      <td>Some Mother's Son (1996)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8650</th>\n",
       "      <td>163.0</td>\n",
       "      <td>9</td>\n",
       "      <td>Maya Lin: A Strong Clear Vision (1994)</td>\n",
       "      <td>Documentary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7696</th>\n",
       "      <td>163.0</td>\n",
       "      <td>10</td>\n",
       "      <td>Prefontaine (1997)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       user  rec_nb                                   title  \\\n",
       "161   163.0       1           Great Day in Harlem, A (1994)   \n",
       "1104  163.0       2                 Tough and Deadly (1995)   \n",
       "2046  163.0       3                    Aiqing wansui (1994)   \n",
       "2988  163.0       4                   Delta of Venus (1994)   \n",
       "3930  163.0       5           Someone Else's America (1995)   \n",
       "4872  163.0       6    Saint of Fort Washington, The (1993)   \n",
       "5813  163.0       7              Celestial Clockwork (1994)   \n",
       "6756  163.0       8                Some Mother's Son (1996)   \n",
       "8650  163.0       9  Maya Lin: A Strong Clear Vision (1994)   \n",
       "7696  163.0      10                      Prefontaine (1997)   \n",
       "\n",
       "                       genres  \n",
       "161               Documentary  \n",
       "1104  Action, Drama, Thriller  \n",
       "2046                    Drama  \n",
       "2988                    Drama  \n",
       "3930                    Drama  \n",
       "4872                    Drama  \n",
       "5813                   Comedy  \n",
       "6756                    Drama  \n",
       "8650              Documentary  \n",
       "7696                    Drama  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
    "items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n",
    "\n",
    "user=random.choice(list(set(train['user'])))\n",
    "\n",
    "train_content=pd.merge(train, items, left_on='item', right_on='id')\n",
    "\n",
    "print('Here is what user rated high:')\n",
    "display(train_content[train_content['user']==user][['user', 'rating', 'title', 'genres']]\\\n",
    "        .sort_values(by='rating', ascending=False)[:15])\n",
    "\n",
    "reco = np.loadtxt('Recommendations generated/ml-100k/Self_BaselineUI_reco.csv', delimiter=',')\n",
    "items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n",
    "\n",
    "# Let's ignore scores - they are not used in evaluation: \n",
    "reco_users=reco[:,:1]\n",
    "reco_items=reco[:,1::2]\n",
    "# Let's put them into one array\n",
    "reco=np.concatenate((reco_users, reco_items), axis=1)\n",
    "\n",
    "# Let's rebuild it user-item dataframe\n",
    "recommended=[]\n",
    "for row in reco:\n",
    "    for rec_nb, entry in enumerate(row[1:]):\n",
    "        recommended.append((row[0], rec_nb+1, entry))\n",
    "recommended=pd.DataFrame(recommended, columns=['user','rec_nb', 'item'])\n",
    "\n",
    "recommended_content=pd.merge(recommended, items, left_on='item', right_on='id')\n",
    "\n",
    "print('Here is what we recommend:')\n",
    "recommended_content[recommended_content['user']==user][['user', 'rec_nb', 'title', 'genres']].sort_values(by='rec_nb')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# project task 3: implement some other evaluation measure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# it may be your idea, modification of what we have already implemented \n",
    "# (for example Hit2 rate which would count as a success users whoreceived at least 2 relevant recommendations) \n",
    "# or something well-known\n",
    "# expected output: modification of evaluation_measures.py such that evaluate_all will also display your measure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "943it [00:00, 11530.77it/s]\n",
      "943it [00:00, 11386.61it/s]\n",
      "943it [00:00, 12285.65it/s]\n",
      "943it [00:00, 11963.28it/s]\n",
      "943it [00:00, 12440.68it/s]\n",
      "943it [00:00, 11968.68it/s]\n",
      "943it [00:00, 11530.37it/s]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>RMSE</th>\n",
       "      <th>MAE</th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>F_1</th>\n",
       "      <th>F_05</th>\n",
       "      <th>precision_super</th>\n",
       "      <th>recall_super</th>\n",
       "      <th>NDCG</th>\n",
       "      <th>mAP</th>\n",
       "      <th>MRR</th>\n",
       "      <th>LAUC</th>\n",
       "      <th>HR</th>\n",
       "      <th>Reco in test</th>\n",
       "      <th>Test coverage</th>\n",
       "      <th>Shannon</th>\n",
       "      <th>Gini</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_TopPop</td>\n",
       "      <td>2.508258</td>\n",
       "      <td>2.217909</td>\n",
       "      <td>0.188865</td>\n",
       "      <td>0.116919</td>\n",
       "      <td>0.118732</td>\n",
       "      <td>0.141584</td>\n",
       "      <td>0.130472</td>\n",
       "      <td>0.137473</td>\n",
       "      <td>0.214651</td>\n",
       "      <td>0.111707</td>\n",
       "      <td>0.400939</td>\n",
       "      <td>0.555546</td>\n",
       "      <td>0.765642</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.038961</td>\n",
       "      <td>3.159079</td>\n",
       "      <td>0.987317</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ready_Baseline</td>\n",
       "      <td>0.949459</td>\n",
       "      <td>0.752487</td>\n",
       "      <td>0.091410</td>\n",
       "      <td>0.037652</td>\n",
       "      <td>0.046030</td>\n",
       "      <td>0.061286</td>\n",
       "      <td>0.079614</td>\n",
       "      <td>0.056463</td>\n",
       "      <td>0.095957</td>\n",
       "      <td>0.043178</td>\n",
       "      <td>0.198193</td>\n",
       "      <td>0.515501</td>\n",
       "      <td>0.437964</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.033911</td>\n",
       "      <td>2.836513</td>\n",
       "      <td>0.991139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_GlobalAvg</td>\n",
       "      <td>1.125760</td>\n",
       "      <td>0.943534</td>\n",
       "      <td>0.061188</td>\n",
       "      <td>0.025968</td>\n",
       "      <td>0.031383</td>\n",
       "      <td>0.041343</td>\n",
       "      <td>0.040558</td>\n",
       "      <td>0.032107</td>\n",
       "      <td>0.067695</td>\n",
       "      <td>0.027470</td>\n",
       "      <td>0.171187</td>\n",
       "      <td>0.509546</td>\n",
       "      <td>0.384942</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.025974</td>\n",
       "      <td>2.711772</td>\n",
       "      <td>0.992003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_TopRated</td>\n",
       "      <td>1.244026</td>\n",
       "      <td>1.000400</td>\n",
       "      <td>0.061188</td>\n",
       "      <td>0.025968</td>\n",
       "      <td>0.031383</td>\n",
       "      <td>0.041343</td>\n",
       "      <td>0.040558</td>\n",
       "      <td>0.032107</td>\n",
       "      <td>0.067695</td>\n",
       "      <td>0.027470</td>\n",
       "      <td>0.171187</td>\n",
       "      <td>0.509546</td>\n",
       "      <td>0.384942</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.025974</td>\n",
       "      <td>2.711772</td>\n",
       "      <td>0.992003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ready_Random</td>\n",
       "      <td>1.518962</td>\n",
       "      <td>1.216659</td>\n",
       "      <td>0.046978</td>\n",
       "      <td>0.019905</td>\n",
       "      <td>0.023281</td>\n",
       "      <td>0.030951</td>\n",
       "      <td>0.029399</td>\n",
       "      <td>0.019829</td>\n",
       "      <td>0.052066</td>\n",
       "      <td>0.021101</td>\n",
       "      <td>0.129283</td>\n",
       "      <td>0.506463</td>\n",
       "      <td>0.314952</td>\n",
       "      <td>0.987911</td>\n",
       "      <td>0.184704</td>\n",
       "      <td>5.110269</td>\n",
       "      <td>0.905724</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_BaselineIU</td>\n",
       "      <td>0.958136</td>\n",
       "      <td>0.754051</td>\n",
       "      <td>0.000954</td>\n",
       "      <td>0.000188</td>\n",
       "      <td>0.000298</td>\n",
       "      <td>0.000481</td>\n",
       "      <td>0.000644</td>\n",
       "      <td>0.000223</td>\n",
       "      <td>0.001043</td>\n",
       "      <td>0.000335</td>\n",
       "      <td>0.003348</td>\n",
       "      <td>0.496433</td>\n",
       "      <td>0.009544</td>\n",
       "      <td>0.699046</td>\n",
       "      <td>0.005051</td>\n",
       "      <td>1.945910</td>\n",
       "      <td>0.995669</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_BaselineUI</td>\n",
       "      <td>0.967585</td>\n",
       "      <td>0.762740</td>\n",
       "      <td>0.000954</td>\n",
       "      <td>0.000170</td>\n",
       "      <td>0.000278</td>\n",
       "      <td>0.000463</td>\n",
       "      <td>0.000644</td>\n",
       "      <td>0.000189</td>\n",
       "      <td>0.000752</td>\n",
       "      <td>0.000168</td>\n",
       "      <td>0.001677</td>\n",
       "      <td>0.496424</td>\n",
       "      <td>0.009544</td>\n",
       "      <td>0.600530</td>\n",
       "      <td>0.005051</td>\n",
       "      <td>1.803126</td>\n",
       "      <td>0.996380</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Model      RMSE       MAE  precision    recall       F_1  \\\n",
       "0      Self_TopPop  2.508258  2.217909   0.188865  0.116919  0.118732   \n",
       "0   Ready_Baseline  0.949459  0.752487   0.091410  0.037652  0.046030   \n",
       "0   Self_GlobalAvg  1.125760  0.943534   0.061188  0.025968  0.031383   \n",
       "0    Self_TopRated  1.244026  1.000400   0.061188  0.025968  0.031383   \n",
       "0     Ready_Random  1.518962  1.216659   0.046978  0.019905  0.023281   \n",
       "0  Self_BaselineIU  0.958136  0.754051   0.000954  0.000188  0.000298   \n",
       "0  Self_BaselineUI  0.967585  0.762740   0.000954  0.000170  0.000278   \n",
       "\n",
       "       F_05  precision_super  recall_super      NDCG       mAP       MRR  \\\n",
       "0  0.141584         0.130472      0.137473  0.214651  0.111707  0.400939   \n",
       "0  0.061286         0.079614      0.056463  0.095957  0.043178  0.198193   \n",
       "0  0.041343         0.040558      0.032107  0.067695  0.027470  0.171187   \n",
       "0  0.041343         0.040558      0.032107  0.067695  0.027470  0.171187   \n",
       "0  0.030951         0.029399      0.019829  0.052066  0.021101  0.129283   \n",
       "0  0.000481         0.000644      0.000223  0.001043  0.000335  0.003348   \n",
       "0  0.000463         0.000644      0.000189  0.000752  0.000168  0.001677   \n",
       "\n",
       "       LAUC        HR  Reco in test  Test coverage   Shannon      Gini  \n",
       "0  0.555546  0.765642      1.000000       0.038961  3.159079  0.987317  \n",
       "0  0.515501  0.437964      1.000000       0.033911  2.836513  0.991139  \n",
       "0  0.509546  0.384942      1.000000       0.025974  2.711772  0.992003  \n",
       "0  0.509546  0.384942      1.000000       0.025974  2.711772  0.992003  \n",
       "0  0.506463  0.314952      0.987911       0.184704  5.110269  0.905724  \n",
       "0  0.496433  0.009544      0.699046       0.005051  1.945910  0.995669  \n",
       "0  0.496424  0.009544      0.600530       0.005051  1.803126  0.996380  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dir_path=\"Recommendations generated/ml-100k/\"\n",
    "super_reactions=[4,5]\n",
    "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
    "\n",
    "ev.evaluate_all(test, dir_path, super_reactions)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}