{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Prepare test set" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import scipy.sparse as sparse\n", "from collections import defaultdict\n", "from itertools import chain\n", "import random\n", "from tqdm import tqdm\n", "\n", "# In evaluation we do not load train set - it is not needed\n", "test = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n", "test.columns = [\"user\", \"item\", \"rating\", \"timestamp\"]\n", "\n", "test[\"user_code\"] = test[\"user\"].astype(\"category\").cat.codes\n", "test[\"item_code\"] = test[\"item\"].astype(\"category\").cat.codes\n", "\n", "user_code_id = dict(enumerate(test[\"user\"].astype(\"category\").cat.categories))\n", "user_id_code = dict((v, k) for k, v in user_code_id.items())\n", "item_code_id = dict(enumerate(test[\"item\"].astype(\"category\").cat.categories))\n", "item_id_code = dict((v, k) for k, v in item_code_id.items())\n", "\n", "test_ui = sparse.csr_matrix((test[\"rating\"], (test[\"user_code\"], test[\"item_code\"])))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Estimations metrics" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "estimations_df = pd.read_csv(\n", " \"Recommendations generated/ml-100k/Ready_Baseline_estimations.csv\", header=None\n", ")\n", "estimations_df.columns = [\"user\", \"item\", \"score\"]\n", "\n", "estimations_df[\"user_code\"] = [user_id_code[user] for user in estimations_df[\"user\"]]\n", "estimations_df[\"item_code\"] = [item_id_code[item] for item in estimations_df[\"item\"]]\n", "estimations = sparse.csr_matrix(\n", " (\n", " estimations_df[\"score\"],\n", " (estimations_df[\"user_code\"], estimations_df[\"item_code\"]),\n", " ),\n", " shape=test_ui.shape,\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def estimations_metrics(test_ui, estimations):\n", " result = []\n", "\n", " RMSE = (np.sum((estimations.data - test_ui.data) ** 2) / estimations.nnz) ** (1 / 2)\n", " result.append([\"RMSE\", RMSE])\n", "\n", " MAE = np.sum(abs(estimations.data - test_ui.data)) / estimations.nnz\n", " result.append([\"MAE\", MAE])\n", "\n", " df_result = (pd.DataFrame(list(zip(*result))[1])).T\n", " df_result.columns = list(zip(*result))[0]\n", " return df_result" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RMSEMAE
00.9494590.752487
\n", "
" ], "text/plain": [ " RMSE MAE\n", "0 0.949459 0.752487" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# in case of error (in the laboratories) you might have to switch to the other version of pandas\n", "# try !pip3 install pandas=='1.0.3' (or pip if you use python 2) and restart the kernel\n", "\n", "estimations_metrics(test_ui, estimations)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Ranking metrics" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[663, 475, 62, ..., 472, 269, 503],\n", " [ 48, 313, 475, ..., 591, 175, 466],\n", " [351, 313, 475, ..., 591, 175, 466],\n", " ...,\n", " [259, 313, 475, ..., 11, 591, 175],\n", " [ 33, 313, 475, ..., 11, 591, 175],\n", " [ 77, 313, 475, ..., 11, 591, 175]])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "\n", "reco = np.loadtxt(\n", " \"Recommendations generated/ml-100k/Ready_Baseline_reco.csv\", delimiter=\",\"\n", ")\n", "# Let's ignore scores - they are not used in evaluation:\n", "users = reco[:, :1]\n", "items = reco[:, 1::2]\n", "# Let's use inner ids instead of real ones\n", "users = np.vectorize(lambda x: user_id_code.setdefault(x, -1))(users)\n", "items = np.vectorize(lambda x: item_id_code.setdefault(x, -1))(items)\n", "reco = np.concatenate((users, items), axis=1)\n", "reco" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def ranking_metrics(test_ui, reco, super_reactions=[], topK=10):\n", "\n", " nb_items = test_ui.shape[1]\n", " (\n", " relevant_users,\n", " super_relevant_users,\n", " prec,\n", " rec,\n", " F_1,\n", " F_05,\n", " prec_super,\n", " rec_super,\n", " ndcg,\n", " mAP,\n", " MRR,\n", " LAUC,\n", " HR,\n", " ) = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)\n", "\n", " cg = 1.0 / np.log2(np.arange(2, topK + 2))\n", " cg_sum = np.cumsum(cg)\n", "\n", " for (nb_user, user) in tqdm(enumerate(reco[:, 0])):\n", " u_rated_items = test_ui.indices[test_ui.indptr[user] : test_ui.indptr[user + 1]]\n", " nb_u_rated_items = len(u_rated_items)\n", " if (\n", " nb_u_rated_items > 0\n", " ): # skip users with no items in test set (still possible that there will be no super items)\n", " relevant_users += 1\n", "\n", " u_super_items = u_rated_items[\n", " np.vectorize(lambda x: x in super_reactions)(\n", " test_ui.data[test_ui.indptr[user] : test_ui.indptr[user + 1]]\n", " )\n", " ]\n", " # more natural seems u_super_items=[item for item in u_rated_items if test_ui[user,item] in super_reactions]\n", " # but accesing test_ui[user,item] is expensive -we should avoid doing it\n", " if len(u_super_items) > 0:\n", " super_relevant_users += 1\n", "\n", " user_successes = np.zeros(topK)\n", " nb_user_successes = 0\n", " user_super_successes = np.zeros(topK)\n", " nb_user_super_successes = 0\n", "\n", " # evaluation\n", " for (item_position, item) in enumerate(reco[nb_user, 1 : topK + 1]):\n", " if item in u_rated_items:\n", " user_successes[item_position] = 1\n", " nb_user_successes += 1\n", " if item in u_super_items:\n", " user_super_successes[item_position] = 1\n", " nb_user_super_successes += 1\n", "\n", " prec_u = nb_user_successes / topK\n", " prec += prec_u\n", "\n", " rec_u = nb_user_successes / nb_u_rated_items\n", " rec += rec_u\n", "\n", " F_1 += 2 * (prec_u * rec_u) / (prec_u + rec_u) if prec_u + rec_u > 0 else 0\n", " F_05 += (\n", " (0.5 ** 2 + 1) * (prec_u * rec_u) / (0.5 ** 2 * prec_u + rec_u)\n", " if prec_u + rec_u > 0\n", " else 0\n", " )\n", "\n", " prec_super += nb_user_super_successes / topK\n", " rec_super += nb_user_super_successes / max(\n", " len(u_super_items), 1\n", " ) # to set 0 if no super items\n", " ndcg += np.dot(user_successes, cg) / cg_sum[min(topK, nb_u_rated_items) - 1]\n", "\n", " cumsum_successes = np.cumsum(user_successes)\n", " mAP += np.dot(\n", " cumsum_successes / np.arange(1, topK + 1), user_successes\n", " ) / min(topK, nb_u_rated_items)\n", " MRR += (\n", " 1 / (user_successes.nonzero()[0][0] + 1)\n", " if user_successes.nonzero()[0].size > 0\n", " else 0\n", " )\n", " LAUC += (\n", " np.dot(cumsum_successes, 1 - user_successes)\n", " + (nb_user_successes + nb_u_rated_items)\n", " / 2\n", " * ((nb_items - nb_u_rated_items) - (topK - nb_user_successes))\n", " ) / ((nb_items - nb_u_rated_items) * nb_u_rated_items)\n", "\n", " HR += nb_user_successes > 0\n", "\n", " result = []\n", " result.append((\"precision\", prec / relevant_users))\n", " result.append((\"recall\", rec / relevant_users))\n", " result.append((\"F_1\", F_1 / relevant_users))\n", " result.append((\"F_05\", F_05 / relevant_users))\n", " result.append((\"precision_super\", prec_super / super_relevant_users))\n", " result.append((\"recall_super\", rec_super / super_relevant_users))\n", " result.append((\"NDCG\", ndcg / relevant_users))\n", " result.append((\"mAP\", mAP / relevant_users))\n", " result.append((\"MRR\", MRR / relevant_users))\n", " result.append((\"LAUC\", LAUC / relevant_users))\n", " result.append((\"HR\", HR / relevant_users))\n", "\n", " df_result = (pd.DataFrame(list(zip(*result))[1])).T\n", " df_result.columns = list(zip(*result))[0]\n", " return df_result" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 9434.06it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
precisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHR
00.091410.0376520.046030.0612860.0796140.0564630.0959570.0431780.1981930.5155010.437964
\n", "
" ], "text/plain": [ " precision recall F_1 F_05 precision_super recall_super \\\n", "0 0.09141 0.037652 0.04603 0.061286 0.079614 0.056463 \n", "\n", " NDCG mAP MRR LAUC HR \n", "0 0.095957 0.043178 0.198193 0.515501 0.437964 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ranking_metrics(test_ui, reco, super_reactions=[4, 5], topK=10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Diversity metrics" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def diversity_metrics(test_ui, reco, topK=10):\n", "\n", " frequencies = defaultdict(int)\n", "\n", " # let's assign 0 to all items in test set\n", " for item in list(set(test_ui.indices)):\n", " frequencies[item] = 0\n", "\n", " # counting frequencies\n", " for item in reco[:, 1:].flat:\n", " frequencies[item] += 1\n", "\n", " nb_reco_outside_test = frequencies[-1]\n", " del frequencies[-1]\n", "\n", " frequencies = np.array(list(frequencies.values()))\n", "\n", " nb_rec_items = len(frequencies[frequencies > 0])\n", " nb_reco_inside_test = np.sum(frequencies)\n", "\n", " frequencies = frequencies / np.sum(frequencies)\n", " frequencies = np.sort(frequencies)\n", "\n", " with np.errstate(\n", " divide=\"ignore\"\n", " ): # let's put zeros put items with 0 frequency and ignore division warning\n", " log_frequencies = np.nan_to_num(np.log(frequencies), posinf=0, neginf=0)\n", "\n", " result = []\n", " result.append(\n", " (\n", " \"Reco in test\",\n", " nb_reco_inside_test / (nb_reco_inside_test + nb_reco_outside_test),\n", " )\n", " )\n", " result.append((\"Test coverage\", nb_rec_items / test_ui.shape[1]))\n", " result.append((\"Shannon\", -np.dot(frequencies, log_frequencies)))\n", " result.append(\n", " (\n", " \"Gini\",\n", " np.dot(frequencies, np.arange(1 - len(frequencies), len(frequencies), 2))\n", " / (len(frequencies) - 1),\n", " )\n", " )\n", "\n", " df_result = (pd.DataFrame(list(zip(*result))[1])).T\n", " df_result.columns = list(zip(*result))[0]\n", " return df_result" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Reco in testTest coverageShannonGini
01.00.0339112.8365130.991139
\n", "
" ], "text/plain": [ " Reco in test Test coverage Shannon Gini\n", "0 1.0 0.033911 2.836513 0.991139" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# in case of errors try !pip3 install numpy==1.18.4 (or pip if you use python 2) and restart the kernel\n", "\n", "x = diversity_metrics(test_ui, reco, topK=10)\n", "x" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# To be used in other notebooks" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 12952.59it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRHitRate2HitRate3Reco in testTest coverageShannonGini
00.9494590.7524870.091410.0376520.046030.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379640.2396610.1261931.00.0339112.8365130.991139
\n", "
" ], "text/plain": [ " RMSE MAE precision recall F_1 F_05 \\\n", "0 0.949459 0.752487 0.09141 0.037652 0.04603 0.061286 \n", "\n", " precision_super recall_super NDCG mAP MRR LAUC \\\n", "0 0.079614 0.056463 0.095957 0.043178 0.198193 0.515501 \n", "\n", " HR HitRate2 HitRate3 Reco in test Test coverage Shannon \\\n", "0 0.437964 0.239661 0.126193 1.0 0.033911 2.836513 \n", "\n", " Gini \n", "0 0.991139 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import evaluation_measures as ev\n", "\n", "estimations_df = pd.read_csv(\n", " \"Recommendations generated/ml-100k/Ready_Baseline_estimations.csv\", header=None\n", ")\n", "reco = np.loadtxt(\n", " \"Recommendations generated/ml-100k/Ready_Baseline_reco.csv\", delimiter=\",\"\n", ")\n", "\n", "ev.evaluate(\n", " test=pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None),\n", " estimations_df=estimations_df,\n", " reco=reco,\n", " super_reactions=[4, 5],\n", ")\n", "# also you can just type ev.evaluate_all(estimations_df, reco) - I put above values as default" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 13130.52it/s]\n", "943it [00:00, 12777.31it/s]\n", "943it [00:00, 13513.65it/s]\n", "943it [00:00, 13323.06it/s]\n", "943it [00:00, 13507.69it/s]\n", "943it [00:00, 13697.48it/s]\n" ] } ], "source": [ "dir_path = \"Recommendations generated/ml-100k/\"\n", "super_reactions = [4, 5]\n", "test = pd.read_csv(\"./Datasets/ml-100k/test.csv\", sep=\"\\t\", header=None)\n", "\n", "df = ev.evaluate_all(test, dir_path, super_reactions)\n", "# also you can just type ev.evaluate_all() - I put above values as default" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_super
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.137473
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.056463
0Ready_Random1.5165121.2172140.0455990.0210010.0241360.0312260.0285410.022057
0Self_TopRated1.0307120.8209040.0009540.0001880.0002980.0004810.0006440.000223
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.000189
0Self_IKNN1.0183630.8087930.0003180.0001080.0001400.0001890.0000000.000000
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", "0 Ready_Random 1.516512 1.217214 0.045599 0.021001 0.024136 \n", "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n", "\n", " F_05 precision_super recall_super \n", "0 0.141584 0.130472 0.137473 \n", "0 0.061286 0.079614 0.056463 \n", "0 0.031226 0.028541 0.022057 \n", "0 0.000481 0.000644 0.000223 \n", "0 0.000463 0.000644 0.000189 \n", "0 0.000189 0.000000 0.000000 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[:, :9]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelNDCGmAPMRRLAUCHRHitRate2HitRate3Reco in testTest coverageShannonGini
0Self_TopPop0.2146510.1117070.4009390.5555460.7656420.4920470.2905621.0000000.0389613.1590790.987317
0Ready_Baseline0.0959570.0431780.1981930.5155010.4379640.2396610.1261931.0000000.0339112.8365130.991139
0Ready_Random0.0501540.0190000.1250890.5070130.3276780.0933190.0265110.9880170.1926415.1412460.903763
0Self_TopRated0.0010430.0003350.0033480.4964330.0095440.0000000.0000000.6990460.0050511.9459100.995669
0Self_BaselineUI0.0007520.0001680.0016770.4964240.0095440.0000000.0000000.6005300.0050511.8031260.996380
0Self_IKNN0.0002140.0000370.0003680.4963910.0031810.0000000.0000000.3921530.1154404.1747410.965327
\n", "
" ], "text/plain": [ " Model NDCG mAP MRR LAUC HR \\\n", "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n", "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n", "0 Ready_Random 0.050154 0.019000 0.125089 0.507013 0.327678 \n", "0 Self_TopRated 0.001043 0.000335 0.003348 0.496433 0.009544 \n", "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n", "0 Self_IKNN 0.000214 0.000037 0.000368 0.496391 0.003181 \n", "\n", " HitRate2 HitRate3 Reco in test Test coverage Shannon Gini \n", "0 0.492047 0.290562 1.000000 0.038961 3.159079 0.987317 \n", "0 0.239661 0.126193 1.000000 0.033911 2.836513 0.991139 \n", "0 0.093319 0.026511 0.988017 0.192641 5.141246 0.903763 \n", "0 0.000000 0.000000 0.699046 0.005051 1.945910 0.995669 \n", "0 0.000000 0.000000 0.600530 0.005051 1.803126 0.996380 \n", "0 0.000000 0.000000 0.392153 0.115440 4.174741 0.965327 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[:, np.append(0, np.arange(9, df.shape[1]))]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Check metrics on toy dataset" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "3it [00:00, ?it/s]\n", "3it [00:00, ?it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRHitRate2HitRate3Reco in testTest coverageShannonGini
0Self_BaselineUI1.6124521.4000.4444440.8888890.5555560.4786320.3333330.750.6769070.5740740.6111110.6388891.00.3333330.00.8888890.81.3862940.250000
0Self_BaselineIU1.6483371.5750.4444440.8888890.5555560.4786320.3333330.750.7205500.6296300.6666670.7222221.00.3333330.00.7777780.81.3517840.357143
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 F_05 \\\n", "0 Self_BaselineUI 1.612452 1.400 0.444444 0.888889 0.555556 0.478632 \n", "0 Self_BaselineIU 1.648337 1.575 0.444444 0.888889 0.555556 0.478632 \n", "\n", " precision_super recall_super NDCG mAP MRR LAUC HR \\\n", "0 0.333333 0.75 0.676907 0.574074 0.611111 0.638889 1.0 \n", "0 0.333333 0.75 0.720550 0.629630 0.666667 0.722222 1.0 \n", "\n", " HitRate2 HitRate3 Reco in test Test coverage Shannon Gini \n", "0 0.333333 0.0 0.888889 0.8 1.386294 0.250000 \n", "0 0.333333 0.0 0.777778 0.8 1.351784 0.357143 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Training data:\n" ] }, { "data": { "text/plain": [ "matrix([[3, 4, 0, 0, 5, 0, 0, 4],\n", " [0, 1, 2, 3, 0, 0, 0, 0],\n", " [0, 0, 0, 5, 0, 3, 4, 0]], dtype=int64)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Test data:\n" ] }, { "data": { "text/plain": [ "matrix([[0, 0, 0, 0, 0, 0, 3, 0],\n", " [0, 0, 0, 0, 5, 0, 0, 0],\n", " [5, 0, 4, 0, 0, 0, 0, 2]], dtype=int64)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Recommendations:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456
00305.0204.0604.0
110403.0602.0702.0
220405.0204.0704.0
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6\n", "0 0 30 5.0 20 4.0 60 4.0\n", "1 10 40 3.0 60 2.0 70 2.0\n", "2 20 40 5.0 20 4.0 70 4.0" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Estimations:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useritemest_score
00604.0
110403.0
22003.0
320204.0
420704.0
\n", "
" ], "text/plain": [ " user item est_score\n", "0 0 60 4.0\n", "1 10 40 3.0\n", "2 20 0 3.0\n", "3 20 20 4.0\n", "4 20 70 4.0" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import helpers\n", "\n", "dir_path = \"Recommendations generated/toy-example/\"\n", "super_reactions = [4, 5]\n", "test = pd.read_csv(\"./Datasets/toy-example/test.csv\", sep=\"\\t\", header=None)\n", "\n", "display(ev.evaluate_all(test, dir_path, super_reactions, topK=3))\n", "# also you can just type ev.evaluate_all() - I put above values as default\n", "\n", "toy_train_read = pd.read_csv(\n", " \"./Datasets/toy-example/train.csv\",\n", " sep=\"\\t\",\n", " header=None,\n", " names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n", ")\n", "toy_test_read = pd.read_csv(\n", " \"./Datasets/toy-example/test.csv\",\n", " sep=\"\\t\",\n", " header=None,\n", " names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n", ")\n", "reco = pd.read_csv(\n", " \"Recommendations generated/toy-example/Self_BaselineUI_reco.csv\", header=None\n", ")\n", "estimations = pd.read_csv(\n", " \"Recommendations generated/toy-example/Self_BaselineUI_estimations.csv\",\n", " names=[\"user\", \"item\", \"est_score\"],\n", ")\n", "(\n", " toy_train_ui,\n", " toy_test_ui,\n", " toy_user_code_id,\n", " toy_user_id_code,\n", " toy_item_code_id,\n", " toy_item_id_code,\n", ") = helpers.data_to_csr(toy_train_read, toy_test_read)\n", "\n", "print(\"Training data:\")\n", "display(toy_train_ui.todense())\n", "\n", "print(\"Test data:\")\n", "display(toy_test_ui.todense())\n", "\n", "print(\"Recommendations:\")\n", "display(reco)\n", "\n", "print(\"Estimations:\")\n", "display(estimations)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Sample recommendations" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Here is what user rated high:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userratingtitlegenres
366527355Lone Star (1996)Drama, Mystery
31437355Star Wars (1977)Action, Adventure, Romance, Sci-Fi, War
529197355Kolya (1996)Comedy
2757354Toy Story (1995)Animation, Children's, Comedy
411347354Trainspotting (1996)Drama
280947354Face/Off (1997)Action, Sci-Fi, Thriller
265487354Everyone Says I Love You (1996)Comedy, Musical, Romance
261867354Air Force One (1997)Action, Thriller
257917354Dead Man Walking (1995)Drama
519487354Mighty Aphrodite (1995)Comedy
527787354Fly Away Home (1996)Adventure, Children's
209667354Secrets & Lies (1996)Drama
193017354Scream (1996)Horror, Thriller
544507354Sense and Sensibility (1995)Drama, Romance
171777354Leaving Las Vegas (1995)Drama, Romance
\n", "
" ], "text/plain": [ " user rating title \\\n", "36652 735 5 Lone Star (1996) \n", "3143 735 5 Star Wars (1977) \n", "52919 735 5 Kolya (1996) \n", "275 735 4 Toy Story (1995) \n", "41134 735 4 Trainspotting (1996) \n", "28094 735 4 Face/Off (1997) \n", "26548 735 4 Everyone Says I Love You (1996) \n", "26186 735 4 Air Force One (1997) \n", "25791 735 4 Dead Man Walking (1995) \n", "51948 735 4 Mighty Aphrodite (1995) \n", "52778 735 4 Fly Away Home (1996) \n", "20966 735 4 Secrets & Lies (1996) \n", "19301 735 4 Scream (1996) \n", "54450 735 4 Sense and Sensibility (1995) \n", "17177 735 4 Leaving Las Vegas (1995) \n", "\n", " genres \n", "36652 Drama, Mystery \n", "3143 Action, Adventure, Romance, Sci-Fi, War \n", "52919 Comedy \n", "275 Animation, Children's, Comedy \n", "41134 Drama \n", "28094 Action, Sci-Fi, Thriller \n", "26548 Comedy, Musical, Romance \n", "26186 Action, Thriller \n", "25791 Drama \n", "51948 Comedy \n", "52778 Adventure, Children's \n", "20966 Drama \n", "19301 Horror, Thriller \n", "54450 Drama, Romance \n", "17177 Drama, Romance " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Here is what we recommend:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userrec_nbtitlegenres
733735.01Great Day in Harlem, A (1994)Documentary
1675735.02Tough and Deadly (1995)Action, Drama, Thriller
2617735.03Aiqing wansui (1994)Drama
3559735.04Delta of Venus (1994)Drama
4501735.05Someone Else's America (1995)Drama
5443735.06Saint of Fort Washington, The (1993)Drama
6385735.07Celestial Clockwork (1994)Comedy
7326735.08Some Mother's Son (1996)Drama
9222735.09Maya Lin: A Strong Clear Vision (1994)Documentary
8268735.010Prefontaine (1997)Drama
\n", "
" ], "text/plain": [ " user rec_nb title \\\n", "733 735.0 1 Great Day in Harlem, A (1994) \n", "1675 735.0 2 Tough and Deadly (1995) \n", "2617 735.0 3 Aiqing wansui (1994) \n", "3559 735.0 4 Delta of Venus (1994) \n", "4501 735.0 5 Someone Else's America (1995) \n", "5443 735.0 6 Saint of Fort Washington, The (1993) \n", "6385 735.0 7 Celestial Clockwork (1994) \n", "7326 735.0 8 Some Mother's Son (1996) \n", "9222 735.0 9 Maya Lin: A Strong Clear Vision (1994) \n", "8268 735.0 10 Prefontaine (1997) \n", "\n", " genres \n", "733 Documentary \n", "1675 Action, Drama, Thriller \n", "2617 Drama \n", "3559 Drama \n", "4501 Drama \n", "5443 Drama \n", "6385 Comedy \n", "7326 Drama \n", "9222 Documentary \n", "8268 Drama " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train = pd.read_csv(\n", " \"./Datasets/ml-100k/train.csv\",\n", " sep=\"\\t\",\n", " header=None,\n", " names=[\"user\", \"item\", \"rating\", \"timestamp\"],\n", ")\n", "items = pd.read_csv(\"./Datasets/ml-100k/movies.csv\")\n", "\n", "user = random.choice(list(set(train[\"user\"])))\n", "\n", "train_content = pd.merge(train, items, left_on=\"item\", right_on=\"id\")\n", "\n", "print(\"Here is what user rated high:\")\n", "display(\n", " train_content[train_content[\"user\"] == user][\n", " [\"user\", \"rating\", \"title\", \"genres\"]\n", " ].sort_values(by=\"rating\", ascending=False)[:15]\n", ")\n", "\n", "reco = np.loadtxt(\n", " \"Recommendations generated/ml-100k/Self_BaselineUI_reco.csv\", delimiter=\",\"\n", ")\n", "items = pd.read_csv(\"./Datasets/ml-100k/movies.csv\")\n", "\n", "# Let's ignore scores - they are not used in evaluation:\n", "reco_users = reco[:, :1]\n", "reco_items = reco[:, 1::2]\n", "# Let's put them into one array\n", "reco = np.concatenate((reco_users, reco_items), axis=1)\n", "\n", "# Let's rebuild it user-item dataframe\n", "recommended = []\n", "for row in reco:\n", " for rec_nb, entry in enumerate(row[1:]):\n", " recommended.append((row[0], rec_nb + 1, entry))\n", "recommended = pd.DataFrame(recommended, columns=[\"user\", \"rec_nb\", \"item\"])\n", "\n", "recommended_content = pd.merge(recommended, items, left_on=\"item\", right_on=\"id\")\n", "\n", "print(\"Here is what we recommend:\")\n", "recommended_content[recommended_content[\"user\"] == user][\n", " [\"user\", \"rec_nb\", \"title\", \"genres\"]\n", "].sort_values(by=\"rec_nb\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# project task 2: implement some other evaluation measure" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# it may be your idea, modification of what we have already implemented\n", "# (for example Hit2 rate which would count as a success users whoreceived at least 2 relevant recommendations)\n", "# or something well-known\n", "# expected output: modification of evaluation_measures.py such that evaluate_all will also display your measure" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 4 }