introduction_to_recommender.../P2. Evaluation.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prepare test set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "slideshow": {
     "slide_type": "-"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scipy.sparse as sparse\n",
    "from collections import defaultdict\n",
    "from itertools import chain\n",
    "import random\n",
    "from tqdm import tqdm\n",
    "\n",
    "# In evaluation we do not load train set - it is not needed\n",
    "test = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n",
    "test.columns = [\"user\", \"item\", \"rating\", \"timestamp\"]\n",
    "\n",
    "test[\"user_code\"] = test[\"user\"].astype(\"category\").cat.codes\n",
    "test[\"item_code\"] = test[\"item\"].astype(\"category\").cat.codes\n",
    "\n",
    "user_code_id = dict(enumerate(test[\"user\"].astype(\"category\").cat.categories))\n",
    "user_id_code = dict((v, k) for k, v in user_code_id.items())\n",
    "item_code_id = dict(enumerate(test[\"item\"].astype(\"category\").cat.categories))\n",
    "item_id_code = dict((v, k) for k, v in item_code_id.items())\n",
    "\n",
    "test_ui = sparse.csr_matrix((test[\"rating\"], (test[\"user_code\"], test[\"item_code\"])))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Estimations metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "estimations_df = pd.read_csv(\n",
    "    \"Recommendations generated/ml-100k/Ready_Baseline_estimations.csv\", header=None\n",
    ")\n",
    "estimations_df.columns = [\"user\", \"item\", \"score\"]\n",
    "\n",
    "estimations_df[\"user_code\"] = [user_id_code[user] for user in estimations_df[\"user\"]]\n",
    "estimations_df[\"item_code\"] = [item_id_code[item] for item in estimations_df[\"item\"]]\n",
    "estimations = sparse.csr_matrix(\n",
    "    (\n",
    "        estimations_df[\"score\"],\n",
    "        (estimations_df[\"user_code\"], estimations_df[\"item_code\"]),\n",
    "    ),\n",
    "    shape=test_ui.shape,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def estimations_metrics(test_ui, estimations):\n",
    "    result = []\n",
    "\n",
    "    RMSE = (np.sum((estimations.data - test_ui.data) ** 2) / estimations.nnz) ** (1 / 2)\n",
    "    result.append([\"RMSE\", RMSE])\n",
    "\n",
    "    MAE = np.sum(abs(estimations.data - test_ui.data)) / estimations.nnz\n",
    "    result.append([\"MAE\", MAE])\n",
    "\n",
    "    df_result = (pd.DataFrame(list(zip(*result))[1])).T\n",
    "    df_result.columns = list(zip(*result))[0]\n",
    "    return df_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>RMSE</th>\n",
       "      <th>MAE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.949459</td>\n",
       "      <td>0.752487</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       RMSE       MAE\n",
       "0  0.949459  0.752487"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# in case of error (in the laboratories) you might have to switch to the other version of pandas\n",
    "# try !pip3 install pandas=='1.0.3' (or pip if you use python 2) and restart the kernel\n",
    "\n",
    "estimations_metrics(test_ui, estimations)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Ranking metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[663, 475,  62, ..., 472, 269, 503],\n",
       "       [ 48, 313, 475, ..., 591, 175, 466],\n",
       "       [351, 313, 475, ..., 591, 175, 466],\n",
       "       ...,\n",
       "       [259, 313, 475, ...,  11, 591, 175],\n",
       "       [ 33, 313, 475, ...,  11, 591, 175],\n",
       "       [ 77, 313, 475, ...,  11, 591, 175]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "reco = np.loadtxt(\n",
    "    \"Recommendations generated/ml-100k/Ready_Baseline_reco.csv\", delimiter=\",\"\n",
    ")\n",
    "# Let's ignore scores - they are not used in evaluation:\n",
    "users = reco[:, :1]\n",
    "items = reco[:, 1::2]\n",
    "# Let's use inner ids instead of real ones\n",
    "users = np.vectorize(lambda x: user_id_code.setdefault(x, -1))(users)\n",
    "items = np.vectorize(lambda x: item_id_code.setdefault(x, -1))(items)\n",
    "reco = np.concatenate((users, items), axis=1)\n",
    "reco"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def ranking_metrics(test_ui, reco, super_reactions=[], topK=10):\n",
    "\n",
    "    nb_items = test_ui.shape[1]\n",
    "    (\n",
    "        relevant_users,\n",
    "        super_relevant_users,\n",
    "        prec,\n",
    "        rec,\n",
    "        F_1,\n",
    "        F_05,\n",
    "        prec_super,\n",
    "        rec_super,\n",
    "        ndcg,\n",
    "        mAP,\n",
    "        MRR,\n",
    "        LAUC,\n",
    "        HR,\n",
    "    ) = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)\n",
    "\n",
    "    cg = 1.0 / np.log2(np.arange(2, topK + 2))\n",
    "    cg_sum = np.cumsum(cg)\n",
    "\n",
    "    for (nb_user, user) in tqdm(enumerate(reco[:, 0])):\n",
    "        u_rated_items = test_ui.indices[test_ui.indptr[user] : test_ui.indptr[user + 1]]\n",
    "        nb_u_rated_items = len(u_rated_items)\n",
    "        if (\n",
    "            nb_u_rated_items > 0\n",
    "        ):  # skip users with no items in test set (still possible that there will be no super items)\n",
    "            relevant_users += 1\n",
    "\n",
    "            u_super_items = u_rated_items[\n",
    "                np.vectorize(lambda x: x in super_reactions)(\n",
    "                    test_ui.data[test_ui.indptr[user] : test_ui.indptr[user + 1]]\n",
    "                )\n",
    "            ]\n",
    "            # more natural seems u_super_items=[item for item in u_rated_items if test_ui[user,item] in super_reactions]\n",
    "            # but accesing test_ui[user,item] is expensive -we should avoid doing it\n",
    "            if len(u_super_items) > 0:\n",
    "                super_relevant_users += 1\n",
    "\n",
    "            user_successes = np.zeros(topK)\n",
    "            nb_user_successes = 0\n",
    "            user_super_successes = np.zeros(topK)\n",
    "            nb_user_super_successes = 0\n",
    "\n",
    "            # evaluation\n",
    "            for (item_position, item) in enumerate(reco[nb_user, 1 : topK + 1]):\n",
    "                if item in u_rated_items:\n",
    "                    user_successes[item_position] = 1\n",
    "                    nb_user_successes += 1\n",
    "                    if item in u_super_items:\n",
    "                        user_super_successes[item_position] = 1\n",
    "                        nb_user_super_successes += 1\n",
    "\n",
    "            prec_u = nb_user_successes / topK\n",
    "            prec += prec_u\n",
    "\n",
    "            rec_u = nb_user_successes / nb_u_rated_items\n",
    "            rec += rec_u\n",
    "\n",
    "            F_1 += 2 * (prec_u * rec_u) / (prec_u + rec_u) if prec_u + rec_u > 0 else 0\n",
    "            F_05 += (\n",
    "                (0.5 ** 2 + 1) * (prec_u * rec_u) / (0.5 ** 2 * prec_u + rec_u)\n",
    "                if prec_u + rec_u > 0\n",
    "                else 0\n",
    "            )\n",
    "\n",
    "            prec_super += nb_user_super_successes / topK\n",
    "            rec_super += nb_user_super_successes / max(\n",
    "                len(u_super_items), 1\n",
    "            )  # to set 0 if no super items\n",
    "            ndcg += np.dot(user_successes, cg) / cg_sum[min(topK, nb_u_rated_items) - 1]\n",
    "\n",
    "            cumsum_successes = np.cumsum(user_successes)\n",
    "            mAP += np.dot(\n",
    "                cumsum_successes / np.arange(1, topK + 1), user_successes\n",
    "            ) / min(topK, nb_u_rated_items)\n",
    "            MRR += (\n",
    "                1 / (user_successes.nonzero()[0][0] + 1)\n",
    "                if user_successes.nonzero()[0].size > 0\n",
    "                else 0\n",
    "            )\n",
    "            LAUC += (\n",
    "                np.dot(cumsum_successes, 1 - user_successes)\n",
    "                + (nb_user_successes + nb_u_rated_items)\n",
    "                / 2\n",
    "                * ((nb_items - nb_u_rated_items) - (topK - nb_user_successes))\n",
    "            ) / ((nb_items - nb_u_rated_items) * nb_u_rated_items)\n",
    "\n",
    "            HR += nb_user_successes > 0\n",
    "\n",
    "    result = []\n",
    "    result.append((\"precision\", prec / relevant_users))\n",
    "    result.append((\"recall\", rec / relevant_users))\n",
    "    result.append((\"F_1\", F_1 / relevant_users))\n",
    "    result.append((\"F_05\", F_05 / relevant_users))\n",
    "    result.append((\"precision_super\", prec_super / super_relevant_users))\n",
    "    result.append((\"recall_super\", rec_super / super_relevant_users))\n",
    "    result.append((\"NDCG\", ndcg / relevant_users))\n",
    "    result.append((\"mAP\", mAP / relevant_users))\n",
    "    result.append((\"MRR\", MRR / relevant_users))\n",
    "    result.append((\"LAUC\", LAUC / relevant_users))\n",
    "    result.append((\"HR\", HR / relevant_users))\n",
    "\n",
    "    df_result = (pd.DataFrame(list(zip(*result))[1])).T\n",
    "    df_result.columns = list(zip(*result))[0]\n",
    "    return df_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "943it [00:00, 9434.06it/s]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>F_1</th>\n",
       "      <th>F_05</th>\n",
       "      <th>precision_super</th>\n",
       "      <th>recall_super</th>\n",
       "      <th>NDCG</th>\n",
       "      <th>mAP</th>\n",
       "      <th>MRR</th>\n",
       "      <th>LAUC</th>\n",
       "      <th>HR</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.09141</td>\n",
       "      <td>0.037652</td>\n",
       "      <td>0.04603</td>\n",
       "      <td>0.061286</td>\n",
       "      <td>0.079614</td>\n",
       "      <td>0.056463</td>\n",
       "      <td>0.095957</td>\n",
       "      <td>0.043178</td>\n",
       "      <td>0.198193</td>\n",
       "      <td>0.515501</td>\n",
       "      <td>0.437964</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   precision    recall      F_1      F_05  precision_super  recall_super  \\\n",
       "0    0.09141  0.037652  0.04603  0.061286         0.079614      0.056463   \n",
       "\n",
       "       NDCG       mAP       MRR      LAUC        HR  \n",
       "0  0.095957  0.043178  0.198193  0.515501  0.437964  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ranking_metrics(test_ui, reco, super_reactions=[4, 5], topK=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Diversity metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def diversity_metrics(test_ui, reco, topK=10):\n",
    "\n",
    "    frequencies = defaultdict(int)\n",
    "\n",
    "    # let's assign 0 to all items in test set\n",
    "    for item in list(set(test_ui.indices)):\n",
    "        frequencies[item] = 0\n",
    "\n",
    "    # counting frequencies\n",
    "    for item in reco[:, 1:].flat:\n",
    "        frequencies[item] += 1\n",
    "\n",
    "    nb_reco_outside_test = frequencies[-1]\n",
    "    del frequencies[-1]\n",
    "\n",
    "    frequencies = np.array(list(frequencies.values()))\n",
    "\n",
    "    nb_rec_items = len(frequencies[frequencies > 0])\n",
    "    nb_reco_inside_test = np.sum(frequencies)\n",
    "\n",
    "    frequencies = frequencies / np.sum(frequencies)\n",
    "    frequencies = np.sort(frequencies)\n",
    "\n",
    "    with np.errstate(\n",
    "        divide=\"ignore\"\n",
    "    ):  # let's put zeros put items with 0 frequency and ignore division warning\n",
    "        log_frequencies = np.nan_to_num(np.log(frequencies), posinf=0, neginf=0)\n",
    "\n",
    "    result = []\n",
    "    result.append(\n",
    "        (\n",
    "            \"Reco in test\",\n",
    "            nb_reco_inside_test / (nb_reco_inside_test + nb_reco_outside_test),\n",
    "        )\n",
    "    )\n",
    "    result.append((\"Test coverage\", nb_rec_items / test_ui.shape[1]))\n",
    "    result.append((\"Shannon\", -np.dot(frequencies, log_frequencies)))\n",
    "    result.append(\n",
    "        (\n",
    "            \"Gini\",\n",
    "            np.dot(frequencies, np.arange(1 - len(frequencies), len(frequencies), 2))\n",
    "            / (len(frequencies) - 1),\n",
    "        )\n",
    "    )\n",
    "\n",
    "    df_result = (pd.DataFrame(list(zip(*result))[1])).T\n",
    "    df_result.columns = list(zip(*result))[0]\n",
    "    return df_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Reco in test</th>\n",
       "      <th>Test coverage</th>\n",
       "      <th>Shannon</th>\n",
       "      <th>Gini</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.033911</td>\n",
       "      <td>2.836513</td>\n",
       "      <td>0.991139</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Reco in test  Test coverage   Shannon      Gini\n",
       "0           1.0       0.033911  2.836513  0.991139"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# in case of errors try !pip3 install numpy==1.18.4 (or pip if you use python 2) and restart the kernel\n",
    "\n",
    "x = diversity_metrics(test_ui, reco, topK=10)\n",
    "x"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# To be used in other notebooks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "943it [00:00, 12952.59it/s]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>RMSE</th>\n",
       "      <th>MAE</th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>F_1</th>\n",
       "      <th>F_05</th>\n",
       "      <th>precision_super</th>\n",
       "      <th>recall_super</th>\n",
       "      <th>NDCG</th>\n",
       "      <th>mAP</th>\n",
       "      <th>MRR</th>\n",
       "      <th>LAUC</th>\n",
       "      <th>HR</th>\n",
       "      <th>HitRate2</th>\n",
       "      <th>HitRate3</th>\n",
       "      <th>Reco in test</th>\n",
       "      <th>Test coverage</th>\n",
       "      <th>Shannon</th>\n",
       "      <th>Gini</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.949459</td>\n",
       "      <td>0.752487</td>\n",
       "      <td>0.09141</td>\n",
       "      <td>0.037652</td>\n",
       "      <td>0.04603</td>\n",
       "      <td>0.061286</td>\n",
       "      <td>0.079614</td>\n",
       "      <td>0.056463</td>\n",
       "      <td>0.095957</td>\n",
       "      <td>0.043178</td>\n",
       "      <td>0.198193</td>\n",
       "      <td>0.515501</td>\n",
       "      <td>0.437964</td>\n",
       "      <td>0.239661</td>\n",
       "      <td>0.126193</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.033911</td>\n",
       "      <td>2.836513</td>\n",
       "      <td>0.991139</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       RMSE       MAE  precision    recall      F_1      F_05  \\\n",
       "0  0.949459  0.752487    0.09141  0.037652  0.04603  0.061286   \n",
       "\n",
       "   precision_super  recall_super      NDCG       mAP       MRR      LAUC  \\\n",
       "0         0.079614      0.056463  0.095957  0.043178  0.198193  0.515501   \n",
       "\n",
       "         HR  HitRate2  HitRate3  Reco in test  Test coverage   Shannon  \\\n",
       "0  0.437964  0.239661  0.126193           1.0       0.033911  2.836513   \n",
       "\n",
       "       Gini  \n",
       "0  0.991139  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import evaluation_measures as ev\n",
    "\n",
    "estimations_df = pd.read_csv(\n",
    "    \"Recommendations generated/ml-100k/Ready_Baseline_estimations.csv\", header=None\n",
    ")\n",
    "reco = np.loadtxt(\n",
    "    \"Recommendations generated/ml-100k/Ready_Baseline_reco.csv\", delimiter=\",\"\n",
    ")\n",
    "\n",
    "ev.evaluate(\n",
    "    test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n",
    "    estimations_df=estimations_df,\n",
    "    reco=reco,\n",
    "    super_reactions=[4, 5],\n",
    ")\n",
    "# also you can just type ev.evaluate_all(estimations_df, reco) - I put above values as default"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "943it [00:00, 13130.52it/s]\n",
      "943it [00:00, 12777.31it/s]\n",
      "943it [00:00, 13513.65it/s]\n",
      "943it [00:00, 13323.06it/s]\n",
      "943it [00:00, 13507.69it/s]\n",
      "943it [00:00, 13697.48it/s]\n"
     ]
    }
   ],
   "source": [
    "dir_path = \"Recommendations generated/ml-100k/\"\n",
    "super_reactions = [4, 5]\n",
    "test = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n",
    "\n",
    "df = ev.evaluate_all(test, dir_path, super_reactions)\n",
    "# also you can just type ev.evaluate_all() - I put above values as default"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>RMSE</th>\n",
       "      <th>MAE</th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>F_1</th>\n",
       "      <th>F_05</th>\n",
       "      <th>precision_super</th>\n",
       "      <th>recall_super</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_TopPop</td>\n",
       "      <td>2.508258</td>\n",
       "      <td>2.217909</td>\n",
       "      <td>0.188865</td>\n",
       "      <td>0.116919</td>\n",
       "      <td>0.118732</td>\n",
       "      <td>0.141584</td>\n",
       "      <td>0.130472</td>\n",
       "      <td>0.137473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ready_Baseline</td>\n",
       "      <td>0.949459</td>\n",
       "      <td>0.752487</td>\n",
       "      <td>0.091410</td>\n",
       "      <td>0.037652</td>\n",
       "      <td>0.046030</td>\n",
       "      <td>0.061286</td>\n",
       "      <td>0.079614</td>\n",
       "      <td>0.056463</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ready_Random</td>\n",
       "      <td>1.516512</td>\n",
       "      <td>1.217214</td>\n",
       "      <td>0.045599</td>\n",
       "      <td>0.021001</td>\n",
       "      <td>0.024136</td>\n",
       "      <td>0.031226</td>\n",
       "      <td>0.028541</td>\n",
       "      <td>0.022057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_TopRated</td>\n",
       "      <td>1.030712</td>\n",
       "      <td>0.820904</td>\n",
       "      <td>0.000954</td>\n",
       "      <td>0.000188</td>\n",
       "      <td>0.000298</td>\n",
       "      <td>0.000481</td>\n",
       "      <td>0.000644</td>\n",
       "      <td>0.000223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_BaselineUI</td>\n",
       "      <td>0.967585</td>\n",
       "      <td>0.762740</td>\n",
       "      <td>0.000954</td>\n",
       "      <td>0.000170</td>\n",
       "      <td>0.000278</td>\n",
       "      <td>0.000463</td>\n",
       "      <td>0.000644</td>\n",
       "      <td>0.000189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_IKNN</td>\n",
       "      <td>1.018363</td>\n",
       "      <td>0.808793</td>\n",
       "      <td>0.000318</td>\n",
       "      <td>0.000108</td>\n",
       "      <td>0.000140</td>\n",
       "      <td>0.000189</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Model      RMSE       MAE  precision    recall       F_1  \\\n",
       "0      Self_TopPop  2.508258  2.217909   0.188865  0.116919  0.118732   \n",
       "0   Ready_Baseline  0.949459  0.752487   0.091410  0.037652  0.046030   \n",
       "0     Ready_Random  1.516512  1.217214   0.045599  0.021001  0.024136   \n",
       "0    Self_TopRated  1.030712  0.820904   0.000954  0.000188  0.000298   \n",
       "0  Self_BaselineUI  0.967585  0.762740   0.000954  0.000170  0.000278   \n",
       "0        Self_IKNN  1.018363  0.808793   0.000318  0.000108  0.000140   \n",
       "\n",
       "       F_05  precision_super  recall_super  \n",
       "0  0.141584         0.130472      0.137473  \n",
       "0  0.061286         0.079614      0.056463  \n",
       "0  0.031226         0.028541      0.022057  \n",
       "0  0.000481         0.000644      0.000223  \n",
       "0  0.000463         0.000644      0.000189  \n",
       "0  0.000189         0.000000      0.000000  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[:, :9]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>NDCG</th>\n",
       "      <th>mAP</th>\n",
       "      <th>MRR</th>\n",
       "      <th>LAUC</th>\n",
       "      <th>HR</th>\n",
       "      <th>HitRate2</th>\n",
       "      <th>HitRate3</th>\n",
       "      <th>Reco in test</th>\n",
       "      <th>Test coverage</th>\n",
       "      <th>Shannon</th>\n",
       "      <th>Gini</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_TopPop</td>\n",
       "      <td>0.214651</td>\n",
       "      <td>0.111707</td>\n",
       "      <td>0.400939</td>\n",
       "      <td>0.555546</td>\n",
       "      <td>0.765642</td>\n",
       "      <td>0.492047</td>\n",
       "      <td>0.290562</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.038961</td>\n",
       "      <td>3.159079</td>\n",
       "      <td>0.987317</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ready_Baseline</td>\n",
       "      <td>0.095957</td>\n",
       "      <td>0.043178</td>\n",
       "      <td>0.198193</td>\n",
       "      <td>0.515501</td>\n",
       "      <td>0.437964</td>\n",
       "      <td>0.239661</td>\n",
       "      <td>0.126193</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.033911</td>\n",
       "      <td>2.836513</td>\n",
       "      <td>0.991139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ready_Random</td>\n",
       "      <td>0.050154</td>\n",
       "      <td>0.019000</td>\n",
       "      <td>0.125089</td>\n",
       "      <td>0.507013</td>\n",
       "      <td>0.327678</td>\n",
       "      <td>0.093319</td>\n",
       "      <td>0.026511</td>\n",
       "      <td>0.988017</td>\n",
       "      <td>0.192641</td>\n",
       "      <td>5.141246</td>\n",
       "      <td>0.903763</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_TopRated</td>\n",
       "      <td>0.001043</td>\n",
       "      <td>0.000335</td>\n",
       "      <td>0.003348</td>\n",
       "      <td>0.496433</td>\n",
       "      <td>0.009544</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.699046</td>\n",
       "      <td>0.005051</td>\n",
       "      <td>1.945910</td>\n",
       "      <td>0.995669</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_BaselineUI</td>\n",
       "      <td>0.000752</td>\n",
       "      <td>0.000168</td>\n",
       "      <td>0.001677</td>\n",
       "      <td>0.496424</td>\n",
       "      <td>0.009544</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.600530</td>\n",
       "      <td>0.005051</td>\n",
       "      <td>1.803126</td>\n",
       "      <td>0.996380</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_IKNN</td>\n",
       "      <td>0.000214</td>\n",
       "      <td>0.000037</td>\n",
       "      <td>0.000368</td>\n",
       "      <td>0.496391</td>\n",
       "      <td>0.003181</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.392153</td>\n",
       "      <td>0.115440</td>\n",
       "      <td>4.174741</td>\n",
       "      <td>0.965327</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Model      NDCG       mAP       MRR      LAUC        HR  \\\n",
       "0      Self_TopPop  0.214651  0.111707  0.400939  0.555546  0.765642   \n",
       "0   Ready_Baseline  0.095957  0.043178  0.198193  0.515501  0.437964   \n",
       "0     Ready_Random  0.050154  0.019000  0.125089  0.507013  0.327678   \n",
       "0    Self_TopRated  0.001043  0.000335  0.003348  0.496433  0.009544   \n",
       "0  Self_BaselineUI  0.000752  0.000168  0.001677  0.496424  0.009544   \n",
       "0        Self_IKNN  0.000214  0.000037  0.000368  0.496391  0.003181   \n",
       "\n",
       "   HitRate2  HitRate3  Reco in test  Test coverage   Shannon      Gini  \n",
       "0  0.492047  0.290562      1.000000       0.038961  3.159079  0.987317  \n",
       "0  0.239661  0.126193      1.000000       0.033911  2.836513  0.991139  \n",
       "0  0.093319  0.026511      0.988017       0.192641  5.141246  0.903763  \n",
       "0  0.000000  0.000000      0.699046       0.005051  1.945910  0.995669  \n",
       "0  0.000000  0.000000      0.600530       0.005051  1.803126  0.996380  \n",
       "0  0.000000  0.000000      0.392153       0.115440  4.174741  0.965327  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[:, np.append(0, np.arange(9, df.shape[1]))]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Check metrics on toy dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3it [00:00, ?it/s]\n",
      "3it [00:00, ?it/s]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>RMSE</th>\n",
       "      <th>MAE</th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>F_1</th>\n",
       "      <th>F_05</th>\n",
       "      <th>precision_super</th>\n",
       "      <th>recall_super</th>\n",
       "      <th>NDCG</th>\n",
       "      <th>mAP</th>\n",
       "      <th>MRR</th>\n",
       "      <th>LAUC</th>\n",
       "      <th>HR</th>\n",
       "      <th>HitRate2</th>\n",
       "      <th>HitRate3</th>\n",
       "      <th>Reco in test</th>\n",
       "      <th>Test coverage</th>\n",
       "      <th>Shannon</th>\n",
       "      <th>Gini</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_BaselineUI</td>\n",
       "      <td>1.612452</td>\n",
       "      <td>1.400</td>\n",
       "      <td>0.444444</td>\n",
       "      <td>0.888889</td>\n",
       "      <td>0.555556</td>\n",
       "      <td>0.478632</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.75</td>\n",
       "      <td>0.676907</td>\n",
       "      <td>0.574074</td>\n",
       "      <td>0.611111</td>\n",
       "      <td>0.638889</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.888889</td>\n",
       "      <td>0.8</td>\n",
       "      <td>1.386294</td>\n",
       "      <td>0.250000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_BaselineIU</td>\n",
       "      <td>1.648337</td>\n",
       "      <td>1.575</td>\n",
       "      <td>0.444444</td>\n",
       "      <td>0.888889</td>\n",
       "      <td>0.555556</td>\n",
       "      <td>0.478632</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.75</td>\n",
       "      <td>0.720550</td>\n",
       "      <td>0.629630</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.722222</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.777778</td>\n",
       "      <td>0.8</td>\n",
       "      <td>1.351784</td>\n",
       "      <td>0.357143</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Model      RMSE    MAE  precision    recall       F_1      F_05  \\\n",
       "0  Self_BaselineUI  1.612452  1.400   0.444444  0.888889  0.555556  0.478632   \n",
       "0  Self_BaselineIU  1.648337  1.575   0.444444  0.888889  0.555556  0.478632   \n",
       "\n",
       "   precision_super  recall_super      NDCG       mAP       MRR      LAUC   HR  \\\n",
       "0         0.333333          0.75  0.676907  0.574074  0.611111  0.638889  1.0   \n",
       "0         0.333333          0.75  0.720550  0.629630  0.666667  0.722222  1.0   \n",
       "\n",
       "   HitRate2  HitRate3  Reco in test  Test coverage   Shannon      Gini  \n",
       "0  0.333333       0.0      0.888889            0.8  1.386294  0.250000  \n",
       "0  0.333333       0.0      0.777778            0.8  1.351784  0.357143  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training data:\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n",
       "        [0, 1, 2, 3, 0, 0, 0, 0],\n",
       "        [0, 0, 0, 5, 0, 3, 4, 0]], dtype=int64)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test data:\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "matrix([[0, 0, 0, 0, 0, 0, 3, 0],\n",
       "        [0, 0, 0, 0, 5, 0, 0, 0],\n",
       "        [5, 0, 4, 0, 0, 0, 0, 2]], dtype=int64)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Recommendations:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>30</td>\n",
       "      <td>5.0</td>\n",
       "      <td>20</td>\n",
       "      <td>4.0</td>\n",
       "      <td>60</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10</td>\n",
       "      <td>40</td>\n",
       "      <td>3.0</td>\n",
       "      <td>60</td>\n",
       "      <td>2.0</td>\n",
       "      <td>70</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20</td>\n",
       "      <td>40</td>\n",
       "      <td>5.0</td>\n",
       "      <td>20</td>\n",
       "      <td>4.0</td>\n",
       "      <td>70</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0   1    2   3    4   5    6\n",
       "0   0  30  5.0  20  4.0  60  4.0\n",
       "1  10  40  3.0  60  2.0  70  2.0\n",
       "2  20  40  5.0  20  4.0  70  4.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Estimations:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>item</th>\n",
       "      <th>est_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>60</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10</td>\n",
       "      <td>40</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20</td>\n",
       "      <td>70</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user  item  est_score\n",
       "0     0    60        4.0\n",
       "1    10    40        3.0\n",
       "2    20     0        3.0\n",
       "3    20    20        4.0\n",
       "4    20    70        4.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import helpers\n",
    "\n",
    "dir_path = \"Recommendations generated/toy-example/\"\n",
    "super_reactions = [4, 5]\n",
    "test = pd.read_csv(\"./Datasets/toy-example/test.csv\", sep=\"\\t\", header=None)\n",
    "\n",
    "display(ev.evaluate_all(test, dir_path, super_reactions, topK=3))\n",
    "# also you can just type ev.evaluate_all() - I put above values as default\n",
    "\n",
    "toy_train_read = pd.read_csv(\n",
    "    \"./Datasets/toy-example/train.csv\",\n",
    "    sep=\"\\t\",\n",
    "    header=None,\n",
    "    names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n",
    ")\n",
    "toy_test_read = pd.read_csv(\n",
    "    \"./Datasets/toy-example/test.csv\",\n",
    "    sep=\"\\t\",\n",
    "    header=None,\n",
    "    names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n",
    ")\n",
    "reco = pd.read_csv(\n",
    "    \"Recommendations generated/toy-example/Self_BaselineUI_reco.csv\", header=None\n",
    ")\n",
    "estimations = pd.read_csv(\n",
    "    \"Recommendations generated/toy-example/Self_BaselineUI_estimations.csv\",\n",
    "    names=[\"user\", \"item\", \"est_score\"],\n",
    ")\n",
    "(\n",
    "    toy_train_ui,\n",
    "    toy_test_ui,\n",
    "    toy_user_code_id,\n",
    "    toy_user_id_code,\n",
    "    toy_item_code_id,\n",
    "    toy_item_id_code,\n",
    ") = helpers.data_to_csr(toy_train_read, toy_test_read)\n",
    "\n",
    "print(\"Training data:\")\n",
    "display(toy_train_ui.todense())\n",
    "\n",
    "print(\"Test data:\")\n",
    "display(toy_test_ui.todense())\n",
    "\n",
    "print(\"Recommendations:\")\n",
    "display(reco)\n",
    "\n",
    "print(\"Estimations:\")\n",
    "display(estimations)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sample recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Here is what user rated high:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>rating</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>36652</th>\n",
       "      <td>735</td>\n",
       "      <td>5</td>\n",
       "      <td>Lone Star (1996)</td>\n",
       "      <td>Drama, Mystery</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3143</th>\n",
       "      <td>735</td>\n",
       "      <td>5</td>\n",
       "      <td>Star Wars (1977)</td>\n",
       "      <td>Action, Adventure, Romance, Sci-Fi, War</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52919</th>\n",
       "      <td>735</td>\n",
       "      <td>5</td>\n",
       "      <td>Kolya (1996)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>275</th>\n",
       "      <td>735</td>\n",
       "      <td>4</td>\n",
       "      <td>Toy Story (1995)</td>\n",
       "      <td>Animation, Children's, Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41134</th>\n",
       "      <td>735</td>\n",
       "      <td>4</td>\n",
       "      <td>Trainspotting (1996)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28094</th>\n",
       "      <td>735</td>\n",
       "      <td>4</td>\n",
       "      <td>Face/Off (1997)</td>\n",
       "      <td>Action, Sci-Fi, Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26548</th>\n",
       "      <td>735</td>\n",
       "      <td>4</td>\n",
       "      <td>Everyone Says I Love You (1996)</td>\n",
       "      <td>Comedy, Musical, Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26186</th>\n",
       "      <td>735</td>\n",
       "      <td>4</td>\n",
       "      <td>Air Force One (1997)</td>\n",
       "      <td>Action, Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25791</th>\n",
       "      <td>735</td>\n",
       "      <td>4</td>\n",
       "      <td>Dead Man Walking (1995)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51948</th>\n",
       "      <td>735</td>\n",
       "      <td>4</td>\n",
       "      <td>Mighty Aphrodite (1995)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52778</th>\n",
       "      <td>735</td>\n",
       "      <td>4</td>\n",
       "      <td>Fly Away Home (1996)</td>\n",
       "      <td>Adventure, Children's</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20966</th>\n",
       "      <td>735</td>\n",
       "      <td>4</td>\n",
       "      <td>Secrets &amp; Lies (1996)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19301</th>\n",
       "      <td>735</td>\n",
       "      <td>4</td>\n",
       "      <td>Scream (1996)</td>\n",
       "      <td>Horror, Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54450</th>\n",
       "      <td>735</td>\n",
       "      <td>4</td>\n",
       "      <td>Sense and Sensibility (1995)</td>\n",
       "      <td>Drama, Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17177</th>\n",
       "      <td>735</td>\n",
       "      <td>4</td>\n",
       "      <td>Leaving Las Vegas (1995)</td>\n",
       "      <td>Drama, Romance</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       user  rating                            title  \\\n",
       "36652   735       5                 Lone Star (1996)   \n",
       "3143    735       5                 Star Wars (1977)   \n",
       "52919   735       5                     Kolya (1996)   \n",
       "275     735       4                 Toy Story (1995)   \n",
       "41134   735       4             Trainspotting (1996)   \n",
       "28094   735       4                  Face/Off (1997)   \n",
       "26548   735       4  Everyone Says I Love You (1996)   \n",
       "26186   735       4             Air Force One (1997)   \n",
       "25791   735       4          Dead Man Walking (1995)   \n",
       "51948   735       4          Mighty Aphrodite (1995)   \n",
       "52778   735       4             Fly Away Home (1996)   \n",
       "20966   735       4            Secrets & Lies (1996)   \n",
       "19301   735       4                    Scream (1996)   \n",
       "54450   735       4     Sense and Sensibility (1995)   \n",
       "17177   735       4         Leaving Las Vegas (1995)   \n",
       "\n",
       "                                        genres  \n",
       "36652                           Drama, Mystery  \n",
       "3143   Action, Adventure, Romance, Sci-Fi, War  \n",
       "52919                                   Comedy  \n",
       "275              Animation, Children's, Comedy  \n",
       "41134                                    Drama  \n",
       "28094                 Action, Sci-Fi, Thriller  \n",
       "26548                 Comedy, Musical, Romance  \n",
       "26186                         Action, Thriller  \n",
       "25791                                    Drama  \n",
       "51948                                   Comedy  \n",
       "52778                    Adventure, Children's  \n",
       "20966                                    Drama  \n",
       "19301                         Horror, Thriller  \n",
       "54450                           Drama, Romance  \n",
       "17177                           Drama, Romance  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Here is what we recommend:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>rec_nb</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>733</th>\n",
       "      <td>735.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Great Day in Harlem, A (1994)</td>\n",
       "      <td>Documentary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1675</th>\n",
       "      <td>735.0</td>\n",
       "      <td>2</td>\n",
       "      <td>Tough and Deadly (1995)</td>\n",
       "      <td>Action, Drama, Thriller</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2617</th>\n",
       "      <td>735.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Aiqing wansui (1994)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3559</th>\n",
       "      <td>735.0</td>\n",
       "      <td>4</td>\n",
       "      <td>Delta of Venus (1994)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4501</th>\n",
       "      <td>735.0</td>\n",
       "      <td>5</td>\n",
       "      <td>Someone Else's America (1995)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5443</th>\n",
       "      <td>735.0</td>\n",
       "      <td>6</td>\n",
       "      <td>Saint of Fort Washington, The (1993)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6385</th>\n",
       "      <td>735.0</td>\n",
       "      <td>7</td>\n",
       "      <td>Celestial Clockwork (1994)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7326</th>\n",
       "      <td>735.0</td>\n",
       "      <td>8</td>\n",
       "      <td>Some Mother's Son (1996)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9222</th>\n",
       "      <td>735.0</td>\n",
       "      <td>9</td>\n",
       "      <td>Maya Lin: A Strong Clear Vision (1994)</td>\n",
       "      <td>Documentary</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8268</th>\n",
       "      <td>735.0</td>\n",
       "      <td>10</td>\n",
       "      <td>Prefontaine (1997)</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       user  rec_nb                                   title  \\\n",
       "733   735.0       1           Great Day in Harlem, A (1994)   \n",
       "1675  735.0       2                 Tough and Deadly (1995)   \n",
       "2617  735.0       3                    Aiqing wansui (1994)   \n",
       "3559  735.0       4                   Delta of Venus (1994)   \n",
       "4501  735.0       5           Someone Else's America (1995)   \n",
       "5443  735.0       6    Saint of Fort Washington, The (1993)   \n",
       "6385  735.0       7              Celestial Clockwork (1994)   \n",
       "7326  735.0       8                Some Mother's Son (1996)   \n",
       "9222  735.0       9  Maya Lin: A Strong Clear Vision (1994)   \n",
       "8268  735.0      10                      Prefontaine (1997)   \n",
       "\n",
       "                       genres  \n",
       "733               Documentary  \n",
       "1675  Action, Drama, Thriller  \n",
       "2617                    Drama  \n",
       "3559                    Drama  \n",
       "4501                    Drama  \n",
       "5443                    Drama  \n",
       "6385                   Comedy  \n",
       "7326                    Drama  \n",
       "9222              Documentary  \n",
       "8268                    Drama  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = pd.read_csv(\n",
    "    \"./Datasets/ml-100k/train.csv\",\n",
    "    sep=\"\\t\",\n",
    "    header=None,\n",
    "    names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n",
    ")\n",
    "items = pd.read_csv(\"./Datasets/ml-100k/movies.csv\")\n",
    "\n",
    "user = random.choice(list(set(train[\"user\"])))\n",
    "\n",
    "train_content = pd.merge(train, items, left_on=\"item\", right_on=\"id\")\n",
    "\n",
    "print(\"Here is what user rated high:\")\n",
    "display(\n",
    "    train_content[train_content[\"user\"] == user][\n",
    "        [\"user\", \"rating\", \"title\", \"genres\"]\n",
    "    ].sort_values(by=\"rating\", ascending=False)[:15]\n",
    ")\n",
    "\n",
    "reco = np.loadtxt(\n",
    "    \"Recommendations generated/ml-100k/Self_BaselineUI_reco.csv\", delimiter=\",\"\n",
    ")\n",
    "items = pd.read_csv(\"./Datasets/ml-100k/movies.csv\")\n",
    "\n",
    "# Let's ignore scores - they are not used in evaluation:\n",
    "reco_users = reco[:, :1]\n",
    "reco_items = reco[:, 1::2]\n",
    "# Let's put them into one array\n",
    "reco = np.concatenate((reco_users, reco_items), axis=1)\n",
    "\n",
    "# Let's rebuild it user-item dataframe\n",
    "recommended = []\n",
    "for row in reco:\n",
    "    for rec_nb, entry in enumerate(row[1:]):\n",
    "        recommended.append((row[0], rec_nb + 1, entry))\n",
    "recommended = pd.DataFrame(recommended, columns=[\"user\", \"rec_nb\", \"item\"])\n",
    "\n",
    "recommended_content = pd.merge(recommended, items, left_on=\"item\", right_on=\"id\")\n",
    "\n",
    "print(\"Here is what we recommend:\")\n",
    "recommended_content[recommended_content[\"user\"] == user][\n",
    "    [\"user\", \"rec_nb\", \"title\", \"genres\"]\n",
    "].sort_values(by=\"rec_nb\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# project task 2: implement some other evaluation measure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# it may be your idea, modification of what we have already implemented\n",
    "# (for example Hit2 rate which would count as a success users whoreceived at least 2 relevant recommendations)\n",
    "# or something well-known\n",
    "# expected output: modification of evaluation_measures.py such that evaluate_all will also display your measure"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}