diff --git a/P0. Data preparation.ipynb b/P0. Data preparation.ipynb index c87a932..9b20a57 100644 --- a/P0. Data preparation.ipynb +++ b/P0. Data preparation.ipynb @@ -13,12 +13,15 @@ "metadata": {}, "outputs": [], "source": [ + "# if you don't have some library installed try using pip (or pip3) to install it - you can do it from the notebook\n", + "# example: !pip install tqdm\n", + "# also on labs it's better to use python3 kernel - ipython3 notebook\n", + "\n", "import pandas as pd\n", "import numpy as np\n", "import scipy.sparse as sparse\n", "import time\n", "import random\n", - "import evaluation_measures as ev\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import os\n", @@ -161,7 +164,7 @@ "text": [ "We have 943 users, 1682 items and 100000 ratings.\n", "\n", - "Average number of ratings per user is 106.04. \n", + "Average number of ratings per user is 106.0445. \n", "\n", "Average number of ratings per item is 59.453.\n", "\n", @@ -170,13 +173,13 @@ } ], "source": [ - "users, items, ratings=len(set(df['user'])), len(set(df['item'])), len(df)\n", + "users, items, ratings=df['user'].nunique(), df['item'].nunique(), len(df)\n", "\n", - "print('We have {} users, {} items and {} ratings.\\n'.format(users, items, ratings))\n", + "print(f'We have {users} users, {items} items and {ratings} ratings.\\n')\n", "\n", - "print('Average number of ratings per user is {}. \\n'.format(round(ratings/users,2)))\n", - "print('Average number of ratings per item is {}.\\n'.format(round(ratings/items,4)))\n", - "print('Data sparsity (% of missing entries) is {}%.'.format(round(100*ratings/(users*items),4)))" + "print(f'Average number of ratings per user is {round(ratings/users,4)}. \\n')\n", + "print(f'Average number of ratings per item is {round(ratings/items,4)}.\\n')\n", + "print(f'Data sparsity (% of missing entries) is {round(100*ratings/(users*items),4)}%.')" ] }, { @@ -636,7 +639,6 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "os.makedirs('./Datasets/toy-example/', exist_ok = True)" ] }, diff --git a/P1. Baseline.ipynb b/P1. Baseline.ipynb index 889bc05..c76aa0a 100644 --- a/P1. Baseline.ipynb +++ b/P1. Baseline.ipynb @@ -239,11 +239,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "Number of ratings: 8 \n", - "Number of users: 3 \n", - "Number of items: 4 \n", - "\n" + "Number of ratings: 8\n", + "Number of users: 3\n", + "Number of items: 4\n" ] } ], @@ -251,8 +249,9 @@ "print('Ratings matrix with missing entries replaced by zeros:')\n", "display(sample_csr.todense())\n", "\n", - "print('\\nNumber of ratings: {} \\nNumber of users: {} \\nNumber of items: {} \\n'\n", - " .format(sample_csr.nnz, sample_csr.shape[0], sample_csr.shape[1]))" + "print(f'Number of ratings: {sample_csr.nnz}')\n", + "print(f'Number of users: {sample_csr.shape[0]}')\n", + "print(f'Number of items: {sample_csr.shape[1]}')" ] }, { @@ -278,7 +277,7 @@ "print('Regarding items:', sample_csr.indices)\n", "\n", "for i in range(sample_csr.shape[0]):\n", - " print('Where ratings from {} to {} belongs to user {}.'.format(sample_csr.indptr[i], sample_csr.indptr[i+1]-1, i))" + " print(f'Where ratings from {sample_csr.indptr[i]} to {sample_csr.indptr[i+1]-1} belongs to user {i}.')" ] }, { @@ -307,7 +306,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "885 ns ± 165 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n", + "1.44 µs ± 184 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n", "Inefficient way to access items rated by user:\n" ] }, @@ -325,7 +324,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "153 µs ± 9.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" + "172 µs ± 14.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" ] } ], @@ -482,7 +481,7 @@ "display(sparse.diags(row_means).todense())\n", "\n", "print(\"\"\"Let's apply them in nonzero entries:\"\"\")\n", - "to_subtract=sparse.diags(row_means)*sample_csr.power(0)\n", + "to_subtract=sparse.diags(row_means)*(sample_csr>0)\n", "display(to_subtract.todense())\n", "\n", "print(\"Finally after subtraction:\")\n", @@ -573,26 +572,26 @@ "metadata": {}, "outputs": [], "source": [ - "TopPop=[]\n", - "train_iu=train_ui.transpose().tocsr()\n", - "scaling_factor=train_ui.max()/max(np.diff(train_iu.indptr))\n", + "top_pop = []\n", + "train_iu = train_ui.transpose().tocsr()\n", + "scaling_factor = train_ui.max()/max(np.diff(train_iu.indptr))\n", "\n", "for i in range(train_iu.shape[0]):\n", - " TopPop.append((i, (train_iu.indptr[i+1]-train_iu.indptr[i])*scaling_factor))\n", + " top_pop.append((i, (train_iu.indptr[i+1]-train_iu.indptr[i])*scaling_factor))\n", " \n", - "TopPop.sort(key=lambda x: x[1], reverse=True)\n", - "#TopPop is an array of pairs (item, rescaled_popularity) sorted descending from the most popular\n", + "top_pop.sort(key=lambda x: x[1], reverse=True)\n", + "#top_pop is an array of pairs (item, rescaled_popularity) sorted descending from the most popular\n", "\n", - "k=10\n", - "result=[]\n", + "k = 10\n", + "result = []\n", "\n", "for u in range(train_ui.shape[0]):\n", - " user_rated=train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n", - " rec_user=[]\n", - " item_pos=0\n", + " user_rated = train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n", + " rec_user = []\n", + " item_pos = 0\n", " while len(rec_user)<10:\n", - " if TopPop[item_pos][0] not in user_rated:\n", - " rec_user.append((item_code_id[TopPop[item_pos][0]], TopPop[item_pos][1]))\n", + " if top_pop[item_pos][0] not in user_rated:\n", + " rec_user.append((item_code_id[top_pop[item_pos][0]], top_pop[item_pos][1]))\n", " item_pos+=1\n", " result.append([user_code_id[u]]+list(chain(*rec_user)))\n", "\n", @@ -613,7 +612,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Self made global average" + "# Self made top rated" ] }, { @@ -622,11 +621,15 @@ "metadata": {}, "outputs": [], "source": [ - "GlobalAvg=[]\n", - "avg=np.sum(train_ui)/train_ui.nnz\n", + "top_rated = []\n", + "global_avg = sum(train_iu.data)/train_ui.nnz\n", "\n", "for i in range(train_iu.shape[0]):\n", - " GlobalAvg.append((i, avg))\n", + " ratings = train_iu.data[train_iu.indptr[i]: train_iu.indptr[i+1]]\n", + " avg = np.mean(ratings) if len(ratings)>0 else global_avg\n", + " top_rated.append((i, avg))\n", + " \n", + "top_rated.sort(key=lambda x: x[1], reverse=True)\n", " \n", "k=10\n", "result=[]\n", @@ -636,21 +639,21 @@ " rec_user=[]\n", " item_pos=0\n", " while len(rec_user)<10:\n", - " if GlobalAvg[item_pos][0] not in user_rated:\n", - " rec_user.append((item_code_id[GlobalAvg[item_pos][0]], GlobalAvg[item_pos][1]))\n", + " if top_rated[item_pos][0] not in user_rated:\n", + " rec_user.append((item_code_id[top_rated[item_pos][0]], top_rated[item_pos][1]))\n", " item_pos+=1\n", " result.append([user_code_id[u]]+list(chain(*rec_user)))\n", "\n", - "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_GlobalAvg_reco.csv', index=False, header=False)\n", + "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopRated_reco.csv', index=False, header=False)\n", "\n", "\n", - "# estimations - score is a bit artificial since that method is not designed for scoring, but for ranking\n", "\n", "estimations=[]\n", + "d = dict(top_rated)\n", "\n", "for user, item in zip(*test_ui.nonzero()):\n", - " estimations.append([user_code_id[user], item_code_id[item], avg])\n", - "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_GlobalAvg_estimations.csv', index=False, header=False)" + " estimations.append([user_code_id[user], item_code_id[item], d[item]])\n", + "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopRated_estimations.csv', index=False, header=False)" ] }, { @@ -706,50 +709,50 @@ " \n", " 0\n", " 1\n", - " 5\n", - " 3.529975\n", - " 10\n", - " 3.529975\n", - " 25\n", - " 3.529975\n", - " 32\n", - " 3.529975\n", - " 33\n", + " 814\n", + " 5.0\n", + " 1122\n", + " 5.0\n", + " 1189\n", + " 5.0\n", + " 1201\n", + " 5.0\n", + " 1293\n", " ...\n", - " 44\n", - " 3.529975\n", - " 46\n", - " 3.529975\n", - " 50\n", - " 3.529975\n", - " 52\n", - " 3.529975\n", - " 55\n", - " 3.529975\n", + " 1306\n", + " 5.0\n", + " 1467\n", + " 5.0\n", + " 1491\n", + " 5.0\n", + " 1500\n", + " 5.0\n", + " 1536\n", + " 5.0\n", " \n", " \n", " 1\n", " 2\n", - " 1\n", - " 3.529975\n", - " 2\n", - " 3.529975\n", - " 3\n", - " 3.529975\n", - " 4\n", - " 3.529975\n", - " 5\n", + " 119\n", + " 5.0\n", + " 814\n", + " 5.0\n", + " 1122\n", + " 5.0\n", + " 1189\n", + " 5.0\n", + " 1201\n", " ...\n", - " 6\n", - " 3.529975\n", - " 7\n", - " 3.529975\n", - " 8\n", - " 3.529975\n", - " 9\n", - " 3.529975\n", - " 11\n", - " 3.529975\n", + " 1293\n", + " 5.0\n", + " 1306\n", + " 5.0\n", + " 1467\n", + " 5.0\n", + " 1491\n", + " 5.0\n", + " 1500\n", + " 5.0\n", " \n", " \n", "\n", @@ -757,13 +760,13 @@ "" ], "text/plain": [ - " 0 1 2 3 4 5 6 7 8 9 ... 11 \\\n", - "0 1 5 3.529975 10 3.529975 25 3.529975 32 3.529975 33 ... 44 \n", - "1 2 1 3.529975 2 3.529975 3 3.529975 4 3.529975 5 ... 6 \n", + " 0 1 2 3 4 5 6 7 8 9 ... 11 12 13 \\\n", + "0 1 814 5.0 1122 5.0 1189 5.0 1201 5.0 1293 ... 1306 5.0 1467 \n", + "1 2 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n", "\n", - " 12 13 14 15 16 17 18 19 20 \n", - "0 3.529975 46 3.529975 50 3.529975 52 3.529975 55 3.529975 \n", - "1 3.529975 7 3.529975 8 3.529975 9 3.529975 11 3.529975 \n", + " 14 15 16 17 18 19 20 \n", + "0 5.0 1491 5.0 1500 5.0 1536 5.0 \n", + "1 5.0 1467 5.0 1491 5.0 1500 5.0 \n", "\n", "[2 rows x 21 columns]" ] @@ -777,25 +780,6 @@ "pd.DataFrame(result)[:2]" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Project task 1 - self made top rated" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "# project task 1: implement TopRated\n", - "# Implement recommender system which will recommend movies (which user hasn't seen) with the highest average rating\n", - "# The output should be saved in 'Recommendations generated/ml-100k/Self_TopRated_reco.csv'\n", - "# and 'Recommendations generated/ml-100k/Self_TopRated_estimations.csv'" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -805,7 +789,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -825,7 +809,7 @@ " \n", " max_row_mean=np.max(row_means)\n", " row_means[row_means==0]=max_row_mean+1\n", - " to_subtract_rows=sparse.diags(row_means)*result.power(0)\n", + " to_subtract_rows=sparse.diags(row_means)*(result>0)\n", " to_subtract_rows.sort_indices() # needed to have valid .data\n", " \n", " subtract=to_subtract_rows.data\n", @@ -878,7 +862,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -1046,7 +1030,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -1065,17 +1049,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# project task 2: implement self-made BaselineIU" + "# project task 1: implement self-made BaselineIU" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "# Implement recommender system which will recommend movies (which user hasn't seen) which is similar to BaselineUI\n", - "# but first subtract col means then row means\n", + "# but first subtract column means then row means\n", "# The output should be saved in 'Recommendations generated/ml-100k/Self_BaselineIU_reco.csv'\n", "# and 'Recommendations generated/ml-100k/Self_BaselineIU_estimations.csv'" ] @@ -1089,7 +1073,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -1146,7 +1130,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1163,7 +1147,7 @@ "0.7524871012820799" ] }, - "execution_count": 24, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1193,24 +1177,24 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "RMSE: 1.5317\n", - "MAE: 1.2304\n" + "RMSE: 1.5147\n", + "MAE: 1.2155\n" ] }, { "data": { "text/plain": [ - "1.2303840461147084" + "1.2154990549993152" ] }, - "execution_count": 25, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } diff --git a/P1. Introduction and baseline.pdf b/P1. Introduction and baseline.pdf index fe035c9..2bff70b 100644 Binary files a/P1. Introduction and baseline.pdf and b/P1. Introduction and baseline.pdf differ diff --git a/P2. Evaluation.ipynb b/P2. Evaluation.ipynb index 6962e33..1f5c329 100644 --- a/P2. Evaluation.ipynb +++ b/P2. Evaluation.ipynb @@ -273,7 +273,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "943it [00:00, 7666.87it/s]\n" + "943it [00:00, 6497.15it/s]\n" ] }, { @@ -477,7 +477,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "943it [00:00, 7370.69it/s]\n" + "943it [00:00, 5143.71it/s]\n" ] }, { @@ -585,11 +585,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "943it [00:00, 7772.74it/s]\n", - "943it [00:00, 5607.69it/s]\n", - "943it [00:00, 4737.64it/s]\n", - "943it [00:00, 4986.41it/s]\n", - "943it [00:00, 3513.77it/s]\n" + "943it [00:00, 3573.64it/s]\n", + "943it [00:00, 5141.54it/s]\n", + "943it [00:00, 2827.19it/s]\n", + "943it [00:00, 2513.13it/s]\n", + "943it [00:00, 3555.67it/s]\n" ] } ], @@ -670,27 +670,27 @@ " \n", " \n", " 0\n", - " Self_GlobalAvg\n", - " 1.125760\n", - " 0.943534\n", - " 0.061188\n", - " 0.025968\n", - " 0.031383\n", - " 0.041343\n", - " 0.040558\n", - " 0.032107\n", + " Ready_Random\n", + " 1.525959\n", + " 1.225122\n", + " 0.047402\n", + " 0.020629\n", + " 0.024471\n", + " 0.032042\n", + " 0.027682\n", + " 0.019353\n", " \n", " \n", " 0\n", - " Ready_Random\n", - " 1.531724\n", - " 1.230384\n", - " 0.049417\n", - " 0.022558\n", - " 0.025490\n", - " 0.033242\n", - " 0.030365\n", - " 0.022626\n", + " Self_TopRated\n", + " 1.030712\n", + " 0.820904\n", + " 0.000954\n", + " 0.000188\n", + " 0.000298\n", + " 0.000481\n", + " 0.000644\n", + " 0.000223\n", " \n", " \n", " 0\n", @@ -712,15 +712,15 @@ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", - "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", - "0 Ready_Random 1.531724 1.230384 0.049417 0.022558 0.025490 \n", + "0 Ready_Random 1.525959 1.225122 0.047402 0.020629 0.024471 \n", + "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "\n", " F_05 precision_super recall_super \n", "0 0.141584 0.130472 0.137473 \n", "0 0.061286 0.079614 0.056463 \n", - "0 0.041343 0.040558 0.032107 \n", - "0 0.033242 0.030365 0.022626 \n", + "0 0.032042 0.027682 0.019353 \n", + "0 0.000481 0.000644 0.000223 \n", "0 0.000463 0.000644 0.000189 " ] }, @@ -800,29 +800,29 @@ " \n", " \n", " 0\n", - " Self_GlobalAvg\n", - " 0.067695\n", - " 0.027470\n", - " 0.171187\n", - " 0.509546\n", - " 0.384942\n", - " 1.000000\n", - " 0.025974\n", - " 2.711772\n", - " 0.992003\n", + " Ready_Random\n", + " 0.051593\n", + " 0.019428\n", + " 0.129062\n", + " 0.506826\n", + " 0.336161\n", + " 0.987593\n", + " 0.175325\n", + " 5.087656\n", + " 0.908118\n", " \n", " \n", " 0\n", - " Ready_Random\n", - " 0.054166\n", - " 0.021656\n", - " 0.128378\n", - " 0.507802\n", - " 0.325557\n", - " 0.988865\n", - " 0.190476\n", - " 5.100033\n", - " 0.907724\n", + " Self_TopRated\n", + " 0.001043\n", + " 0.000335\n", + " 0.003348\n", + " 0.496433\n", + " 0.009544\n", + " 0.699046\n", + " 0.005051\n", + " 1.945910\n", + " 0.995669\n", " \n", " \n", " 0\n", @@ -845,15 +845,15 @@ " Model NDCG mAP MRR LAUC HR \\\n", "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n", "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n", - "0 Self_GlobalAvg 0.067695 0.027470 0.171187 0.509546 0.384942 \n", - "0 Ready_Random 0.054166 0.021656 0.128378 0.507802 0.325557 \n", + "0 Ready_Random 0.051593 0.019428 0.129062 0.506826 0.336161 \n", + "0 Self_TopRated 0.001043 0.000335 0.003348 0.496433 0.009544 \n", "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n", "\n", " Reco in test Test coverage Shannon Gini \n", "0 1.000000 0.038961 3.159079 0.987317 \n", "0 1.000000 0.033911 2.836513 0.991139 \n", - "0 1.000000 0.025974 2.711772 0.992003 \n", - "0 0.988865 0.190476 5.100033 0.907724 \n", + "0 0.987593 0.175325 5.087656 0.908118 \n", + "0 0.699046 0.005051 1.945910 0.995669 \n", "0 0.600530 0.005051 1.803126 0.996380 " ] }, @@ -882,7 +882,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "3it [00:00, 1941.81it/s]\n" + "3it [00:00, 1191.68it/s]\n" ] }, { @@ -1246,148 +1246,148 @@ " \n", " \n", " \n", - " 2985\n", - " 789\n", + " 50941\n", + " 661\n", " 5\n", - " Star Wars (1977)\n", - " Action, Adventure, Romance, Sci-Fi, War\n", - " \n", - " \n", - " 25980\n", - " 789\n", - " 5\n", - " Dead Man Walking (1995)\n", + " It's a Wonderful Life (1946)\n", " Drama\n", " \n", " \n", - " 9357\n", - " 789\n", + " 9531\n", + " 661\n", " 5\n", - " Last Supper, The (1995)\n", - " Drama, Thriller\n", + " Wizard of Oz, The (1939)\n", + " Adventure, Children's, Drama, Musical\n", " \n", " \n", - " 17306\n", - " 789\n", + " 27182\n", + " 661\n", " 5\n", - " Leaving Las Vegas (1995)\n", - " Drama, Romance\n", + " Empire Strikes Back, The (1980)\n", + " Action, Adventure, Drama, Romance, Sci-Fi, War\n", " \n", " \n", - " 36474\n", - " 789\n", + " 23944\n", + " 661\n", " 5\n", - " Swingers (1996)\n", - " Comedy, Drama\n", + " Apocalypse Now (1979)\n", + " Drama, War\n", " \n", " \n", - " 65139\n", - " 789\n", - " 4\n", - " Welcome to the Dollhouse (1995)\n", - " Comedy, Drama\n", - " \n", - " \n", - " 61975\n", - " 789\n", - " 4\n", - " Private Parts (1997)\n", - " Comedy, Drama\n", - " \n", - " \n", - " 56522\n", - " 789\n", - " 4\n", - " Waiting for Guffman (1996)\n", - " Comedy\n", - " \n", - " \n", - " 41414\n", - " 789\n", - " 4\n", - " Donnie Brasco (1997)\n", - " Crime, Drama\n", - " \n", - " \n", - " 36617\n", - " 789\n", - " 4\n", - " Lone Star (1996)\n", - " Drama, Mystery\n", - " \n", - " \n", - " 24501\n", - " 789\n", - " 4\n", - " People vs. Larry Flynt, The (1996)\n", - " Drama\n", - " \n", - " \n", - " 20210\n", - " 789\n", - " 4\n", + " 20285\n", + " 661\n", + " 5\n", " Return of the Jedi (1983)\n", " Action, Adventure, Romance, Sci-Fi, War\n", " \n", " \n", - " 8230\n", - " 789\n", - " 3\n", - " Beautiful Girls (1996)\n", + " 37504\n", + " 661\n", + " 5\n", + " Aladdin (1992)\n", + " Animation, Children's, Comedy, Musical\n", + " \n", + " \n", + " 68312\n", + " 661\n", + " 5\n", + " Babe (1995)\n", + " Children's, Comedy, Drama\n", + " \n", + " \n", + " 16362\n", + " 661\n", + " 5\n", + " Apollo 13 (1995)\n", + " Action, Drama, Thriller\n", + " \n", + " \n", + " 15168\n", + " 661\n", + " 5\n", + " Indiana Jones and the Last Crusade (1989)\n", + " Action, Adventure\n", + " \n", + " \n", + " 29402\n", + " 661\n", + " 5\n", + " Psycho (1960)\n", + " Horror, Romance, Thriller\n", + " \n", + " \n", + " 40755\n", + " 661\n", + " 5\n", + " Jean de Florette (1986)\n", " Drama\n", " \n", " \n", - " 19781\n", - " 789\n", - " 3\n", - " Liar Liar (1997)\n", - " Comedy\n", + " 41950\n", + " 661\n", + " 5\n", + " Die Hard (1988)\n", + " Action, Thriller\n", " \n", " \n", - " 39387\n", - " 789\n", - " 3\n", - " Sleepers (1996)\n", - " Crime, Drama\n", + " 58932\n", + " 661\n", + " 5\n", + " Enchanted April (1991)\n", + " Drama\n", + " \n", + " \n", + " 43013\n", + " 661\n", + " 5\n", + " 2001: A Space Odyssey (1968)\n", + " Drama, Mystery, Sci-Fi, Thriller\n", + " \n", + " \n", + " 65664\n", + " 661\n", + " 5\n", + " Star Trek: The Wrath of Khan (1982)\n", + " Action, Adventure, Sci-Fi\n", " \n", " \n", "\n", "" ], "text/plain": [ - " user rating title \\\n", - "2985 789 5 Star Wars (1977) \n", - "25980 789 5 Dead Man Walking (1995) \n", - "9357 789 5 Last Supper, The (1995) \n", - "17306 789 5 Leaving Las Vegas (1995) \n", - "36474 789 5 Swingers (1996) \n", - "65139 789 4 Welcome to the Dollhouse (1995) \n", - "61975 789 4 Private Parts (1997) \n", - "56522 789 4 Waiting for Guffman (1996) \n", - "41414 789 4 Donnie Brasco (1997) \n", - "36617 789 4 Lone Star (1996) \n", - "24501 789 4 People vs. Larry Flynt, The (1996) \n", - "20210 789 4 Return of the Jedi (1983) \n", - "8230 789 3 Beautiful Girls (1996) \n", - "19781 789 3 Liar Liar (1997) \n", - "39387 789 3 Sleepers (1996) \n", + " user rating title \\\n", + "50941 661 5 It's a Wonderful Life (1946) \n", + "9531 661 5 Wizard of Oz, The (1939) \n", + "27182 661 5 Empire Strikes Back, The (1980) \n", + "23944 661 5 Apocalypse Now (1979) \n", + "20285 661 5 Return of the Jedi (1983) \n", + "37504 661 5 Aladdin (1992) \n", + "68312 661 5 Babe (1995) \n", + "16362 661 5 Apollo 13 (1995) \n", + "15168 661 5 Indiana Jones and the Last Crusade (1989) \n", + "29402 661 5 Psycho (1960) \n", + "40755 661 5 Jean de Florette (1986) \n", + "41950 661 5 Die Hard (1988) \n", + "58932 661 5 Enchanted April (1991) \n", + "43013 661 5 2001: A Space Odyssey (1968) \n", + "65664 661 5 Star Trek: The Wrath of Khan (1982) \n", "\n", - " genres \n", - "2985 Action, Adventure, Romance, Sci-Fi, War \n", - "25980 Drama \n", - "9357 Drama, Thriller \n", - "17306 Drama, Romance \n", - "36474 Comedy, Drama \n", - "65139 Comedy, Drama \n", - "61975 Comedy, Drama \n", - "56522 Comedy \n", - "41414 Crime, Drama \n", - "36617 Drama, Mystery \n", - "24501 Drama \n", - "20210 Action, Adventure, Romance, Sci-Fi, War \n", - "8230 Drama \n", - "19781 Comedy \n", - "39387 Crime, Drama " + " genres \n", + "50941 Drama \n", + "9531 Adventure, Children's, Drama, Musical \n", + "27182 Action, Adventure, Drama, Romance, Sci-Fi, War \n", + "23944 Drama, War \n", + "20285 Action, Adventure, Romance, Sci-Fi, War \n", + "37504 Animation, Children's, Comedy, Musical \n", + "68312 Children's, Comedy, Drama \n", + "16362 Action, Drama, Thriller \n", + "15168 Action, Adventure \n", + "29402 Horror, Romance, Thriller \n", + "40755 Drama \n", + "41950 Action, Thriller \n", + "58932 Drama \n", + "43013 Drama, Mystery, Sci-Fi, Thriller \n", + "65664 Action, Adventure, Sci-Fi " ] }, "metadata": {}, @@ -1429,71 +1429,71 @@ " \n", " \n", " \n", - " 787\n", - " 789.0\n", + " 659\n", + " 661.0\n", " 1\n", " Great Day in Harlem, A (1994)\n", " Documentary\n", " \n", " \n", - " 1729\n", - " 789.0\n", + " 1601\n", + " 661.0\n", " 2\n", " Tough and Deadly (1995)\n", " Action, Drama, Thriller\n", " \n", " \n", - " 2671\n", - " 789.0\n", + " 2543\n", + " 661.0\n", " 3\n", " Aiqing wansui (1994)\n", " Drama\n", " \n", " \n", - " 3613\n", - " 789.0\n", + " 3485\n", + " 661.0\n", " 4\n", " Delta of Venus (1994)\n", " Drama\n", " \n", " \n", - " 4555\n", - " 789.0\n", + " 4427\n", + " 661.0\n", " 5\n", " Someone Else's America (1995)\n", " Drama\n", " \n", " \n", - " 5497\n", - " 789.0\n", + " 5369\n", + " 661.0\n", " 6\n", " Saint of Fort Washington, The (1993)\n", " Drama\n", " \n", " \n", - " 6439\n", - " 789.0\n", + " 6311\n", + " 661.0\n", " 7\n", " Celestial Clockwork (1994)\n", " Comedy\n", " \n", " \n", - " 7380\n", - " 789.0\n", + " 7253\n", + " 661.0\n", " 8\n", " Some Mother's Son (1996)\n", " Drama\n", " \n", " \n", - " 9276\n", - " 789.0\n", + " 9148\n", + " 661.0\n", " 9\n", " Maya Lin: A Strong Clear Vision (1994)\n", " Documentary\n", " \n", " \n", - " 8322\n", - " 789.0\n", + " 8194\n", + " 661.0\n", " 10\n", " Prefontaine (1997)\n", " Drama\n", @@ -1504,28 +1504,28 @@ ], "text/plain": [ " user rec_nb title \\\n", - "787 789.0 1 Great Day in Harlem, A (1994) \n", - "1729 789.0 2 Tough and Deadly (1995) \n", - "2671 789.0 3 Aiqing wansui (1994) \n", - "3613 789.0 4 Delta of Venus (1994) \n", - "4555 789.0 5 Someone Else's America (1995) \n", - "5497 789.0 6 Saint of Fort Washington, The (1993) \n", - "6439 789.0 7 Celestial Clockwork (1994) \n", - "7380 789.0 8 Some Mother's Son (1996) \n", - "9276 789.0 9 Maya Lin: A Strong Clear Vision (1994) \n", - "8322 789.0 10 Prefontaine (1997) \n", + "659 661.0 1 Great Day in Harlem, A (1994) \n", + "1601 661.0 2 Tough and Deadly (1995) \n", + "2543 661.0 3 Aiqing wansui (1994) \n", + "3485 661.0 4 Delta of Venus (1994) \n", + "4427 661.0 5 Someone Else's America (1995) \n", + "5369 661.0 6 Saint of Fort Washington, The (1993) \n", + "6311 661.0 7 Celestial Clockwork (1994) \n", + "7253 661.0 8 Some Mother's Son (1996) \n", + "9148 661.0 9 Maya Lin: A Strong Clear Vision (1994) \n", + "8194 661.0 10 Prefontaine (1997) \n", "\n", " genres \n", - "787 Documentary \n", - "1729 Action, Drama, Thriller \n", - "2671 Drama \n", - "3613 Drama \n", - "4555 Drama \n", - "5497 Drama \n", - "6439 Comedy \n", - "7380 Drama \n", - "9276 Documentary \n", - "8322 Drama " + "659 Documentary \n", + "1601 Action, Drama, Thriller \n", + "2543 Drama \n", + "3485 Drama \n", + "4427 Drama \n", + "5369 Drama \n", + "6311 Comedy \n", + "7253 Drama \n", + "9148 Documentary \n", + "8194 Drama " ] }, "execution_count": 15, @@ -1595,11 +1595,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "943it [00:00, 4479.94it/s]\n", - "943it [00:00, 4036.40it/s]\n", - "943it [00:00, 4598.99it/s]\n", - "943it [00:00, 5170.18it/s]\n", - "943it [00:00, 4778.23it/s]\n" + "943it [00:00, 4220.01it/s]\n", + "943it [00:00, 3015.35it/s]\n", + "943it [00:00, 2308.31it/s]\n", + "943it [00:00, 3461.11it/s]\n", + "943it [00:00, 3442.41it/s]\n" ] }, { @@ -1688,45 +1688,45 @@ " \n", " \n", " 0\n", - " Self_GlobalAvg\n", - " 1.125760\n", - " 0.943534\n", - " 0.061188\n", - " 0.025968\n", - " 0.031383\n", - " 0.041343\n", - " 0.040558\n", - " 0.032107\n", - " 0.067695\n", - " 0.027470\n", - " 0.171187\n", - " 0.509546\n", - " 0.384942\n", - " 1.000000\n", - " 0.025974\n", - " 2.711772\n", - " 0.992003\n", + " Ready_Random\n", + " 1.525959\n", + " 1.225122\n", + " 0.047402\n", + " 0.020629\n", + " 0.024471\n", + " 0.032042\n", + " 0.027682\n", + " 0.019353\n", + " 0.051593\n", + " 0.019428\n", + " 0.129062\n", + " 0.506826\n", + " 0.336161\n", + " 0.987593\n", + " 0.175325\n", + " 5.087656\n", + " 0.908118\n", " \n", " \n", " 0\n", - " Ready_Random\n", - " 1.531724\n", - " 1.230384\n", - " 0.049417\n", - " 0.022558\n", - " 0.025490\n", - " 0.033242\n", - " 0.030365\n", - " 0.022626\n", - " 0.054166\n", - " 0.021656\n", - " 0.128378\n", - " 0.507802\n", - " 0.325557\n", - " 0.988865\n", - " 0.190476\n", - " 5.100033\n", - " 0.907724\n", + " Self_TopRated\n", + " 1.030712\n", + " 0.820904\n", + " 0.000954\n", + " 0.000188\n", + " 0.000298\n", + " 0.000481\n", + " 0.000644\n", + " 0.000223\n", + " 0.001043\n", + " 0.000335\n", + " 0.003348\n", + " 0.496433\n", + " 0.009544\n", + " 0.699046\n", + " 0.005051\n", + " 1.945910\n", + " 0.995669\n", " \n", " \n", " 0\n", @@ -1757,22 +1757,22 @@ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", - "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", - "0 Ready_Random 1.531724 1.230384 0.049417 0.022558 0.025490 \n", + "0 Ready_Random 1.525959 1.225122 0.047402 0.020629 0.024471 \n", + "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "\n", " F_05 precision_super recall_super NDCG mAP MRR \\\n", "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n", "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n", - "0 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 \n", - "0 0.033242 0.030365 0.022626 0.054166 0.021656 0.128378 \n", + "0 0.032042 0.027682 0.019353 0.051593 0.019428 0.129062 \n", + "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n", "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n", "\n", " LAUC HR Reco in test Test coverage Shannon Gini \n", "0 0.555546 0.765642 1.000000 0.038961 3.159079 0.987317 \n", "0 0.515501 0.437964 1.000000 0.033911 2.836513 0.991139 \n", - "0 0.509546 0.384942 1.000000 0.025974 2.711772 0.992003 \n", - "0 0.507802 0.325557 0.988865 0.190476 5.100033 0.907724 \n", + "0 0.506826 0.336161 0.987593 0.175325 5.087656 0.908118 \n", + "0 0.496433 0.009544 0.699046 0.005051 1.945910 0.995669 \n", "0 0.496424 0.009544 0.600530 0.005051 1.803126 0.996380 " ] }, diff --git a/P2. Evaluation.pdf b/P2. Evaluation.pdf index 60e0c74..44b1b4c 100644 Binary files a/P2. Evaluation.pdf and b/P2. Evaluation.pdf differ