diff --git a/P0. Data preparation.ipynb b/P0. Data preparation.ipynb
index c87a932..9b20a57 100644
--- a/P0. Data preparation.ipynb
+++ b/P0. Data preparation.ipynb
@@ -13,12 +13,15 @@
"metadata": {},
"outputs": [],
"source": [
+ "# if you don't have some library installed try using pip (or pip3) to install it - you can do it from the notebook\n",
+ "# example: !pip install tqdm\n",
+ "# also on labs it's better to use python3 kernel - ipython3 notebook\n",
+ "\n",
"import pandas as pd\n",
"import numpy as np\n",
"import scipy.sparse as sparse\n",
"import time\n",
"import random\n",
- "import evaluation_measures as ev\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"import os\n",
@@ -161,7 +164,7 @@
"text": [
"We have 943 users, 1682 items and 100000 ratings.\n",
"\n",
- "Average number of ratings per user is 106.04. \n",
+ "Average number of ratings per user is 106.0445. \n",
"\n",
"Average number of ratings per item is 59.453.\n",
"\n",
@@ -170,13 +173,13 @@
}
],
"source": [
- "users, items, ratings=len(set(df['user'])), len(set(df['item'])), len(df)\n",
+ "users, items, ratings=df['user'].nunique(), df['item'].nunique(), len(df)\n",
"\n",
- "print('We have {} users, {} items and {} ratings.\\n'.format(users, items, ratings))\n",
+ "print(f'We have {users} users, {items} items and {ratings} ratings.\\n')\n",
"\n",
- "print('Average number of ratings per user is {}. \\n'.format(round(ratings/users,2)))\n",
- "print('Average number of ratings per item is {}.\\n'.format(round(ratings/items,4)))\n",
- "print('Data sparsity (% of missing entries) is {}%.'.format(round(100*ratings/(users*items),4)))"
+ "print(f'Average number of ratings per user is {round(ratings/users,4)}. \\n')\n",
+ "print(f'Average number of ratings per item is {round(ratings/items,4)}.\\n')\n",
+ "print(f'Data sparsity (% of missing entries) is {round(100*ratings/(users*items),4)}%.')"
]
},
{
@@ -636,7 +639,6 @@
"metadata": {},
"outputs": [],
"source": [
- "import os\n",
"os.makedirs('./Datasets/toy-example/', exist_ok = True)"
]
},
diff --git a/P1. Baseline.ipynb b/P1. Baseline.ipynb
index 889bc05..c76aa0a 100644
--- a/P1. Baseline.ipynb
+++ b/P1. Baseline.ipynb
@@ -239,11 +239,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "\n",
- "Number of ratings: 8 \n",
- "Number of users: 3 \n",
- "Number of items: 4 \n",
- "\n"
+ "Number of ratings: 8\n",
+ "Number of users: 3\n",
+ "Number of items: 4\n"
]
}
],
@@ -251,8 +249,9 @@
"print('Ratings matrix with missing entries replaced by zeros:')\n",
"display(sample_csr.todense())\n",
"\n",
- "print('\\nNumber of ratings: {} \\nNumber of users: {} \\nNumber of items: {} \\n'\n",
- " .format(sample_csr.nnz, sample_csr.shape[0], sample_csr.shape[1]))"
+ "print(f'Number of ratings: {sample_csr.nnz}')\n",
+ "print(f'Number of users: {sample_csr.shape[0]}')\n",
+ "print(f'Number of items: {sample_csr.shape[1]}')"
]
},
{
@@ -278,7 +277,7 @@
"print('Regarding items:', sample_csr.indices)\n",
"\n",
"for i in range(sample_csr.shape[0]):\n",
- " print('Where ratings from {} to {} belongs to user {}.'.format(sample_csr.indptr[i], sample_csr.indptr[i+1]-1, i))"
+ " print(f'Where ratings from {sample_csr.indptr[i]} to {sample_csr.indptr[i+1]-1} belongs to user {i}.')"
]
},
{
@@ -307,7 +306,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "885 ns ± 165 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n",
+ "1.44 µs ± 184 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n",
"Inefficient way to access items rated by user:\n"
]
},
@@ -325,7 +324,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "153 µs ± 9.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
+ "172 µs ± 14.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
]
}
],
@@ -482,7 +481,7 @@
"display(sparse.diags(row_means).todense())\n",
"\n",
"print(\"\"\"Let's apply them in nonzero entries:\"\"\")\n",
- "to_subtract=sparse.diags(row_means)*sample_csr.power(0)\n",
+ "to_subtract=sparse.diags(row_means)*(sample_csr>0)\n",
"display(to_subtract.todense())\n",
"\n",
"print(\"Finally after subtraction:\")\n",
@@ -573,26 +572,26 @@
"metadata": {},
"outputs": [],
"source": [
- "TopPop=[]\n",
- "train_iu=train_ui.transpose().tocsr()\n",
- "scaling_factor=train_ui.max()/max(np.diff(train_iu.indptr))\n",
+ "top_pop = []\n",
+ "train_iu = train_ui.transpose().tocsr()\n",
+ "scaling_factor = train_ui.max()/max(np.diff(train_iu.indptr))\n",
"\n",
"for i in range(train_iu.shape[0]):\n",
- " TopPop.append((i, (train_iu.indptr[i+1]-train_iu.indptr[i])*scaling_factor))\n",
+ " top_pop.append((i, (train_iu.indptr[i+1]-train_iu.indptr[i])*scaling_factor))\n",
" \n",
- "TopPop.sort(key=lambda x: x[1], reverse=True)\n",
- "#TopPop is an array of pairs (item, rescaled_popularity) sorted descending from the most popular\n",
+ "top_pop.sort(key=lambda x: x[1], reverse=True)\n",
+ "#top_pop is an array of pairs (item, rescaled_popularity) sorted descending from the most popular\n",
"\n",
- "k=10\n",
- "result=[]\n",
+ "k = 10\n",
+ "result = []\n",
"\n",
"for u in range(train_ui.shape[0]):\n",
- " user_rated=train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n",
- " rec_user=[]\n",
- " item_pos=0\n",
+ " user_rated = train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n",
+ " rec_user = []\n",
+ " item_pos = 0\n",
" while len(rec_user)<10:\n",
- " if TopPop[item_pos][0] not in user_rated:\n",
- " rec_user.append((item_code_id[TopPop[item_pos][0]], TopPop[item_pos][1]))\n",
+ " if top_pop[item_pos][0] not in user_rated:\n",
+ " rec_user.append((item_code_id[top_pop[item_pos][0]], top_pop[item_pos][1]))\n",
" item_pos+=1\n",
" result.append([user_code_id[u]]+list(chain(*rec_user)))\n",
"\n",
@@ -613,7 +612,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Self made global average"
+ "# Self made top rated"
]
},
{
@@ -622,11 +621,15 @@
"metadata": {},
"outputs": [],
"source": [
- "GlobalAvg=[]\n",
- "avg=np.sum(train_ui)/train_ui.nnz\n",
+ "top_rated = []\n",
+ "global_avg = sum(train_iu.data)/train_ui.nnz\n",
"\n",
"for i in range(train_iu.shape[0]):\n",
- " GlobalAvg.append((i, avg))\n",
+ " ratings = train_iu.data[train_iu.indptr[i]: train_iu.indptr[i+1]]\n",
+ " avg = np.mean(ratings) if len(ratings)>0 else global_avg\n",
+ " top_rated.append((i, avg))\n",
+ " \n",
+ "top_rated.sort(key=lambda x: x[1], reverse=True)\n",
" \n",
"k=10\n",
"result=[]\n",
@@ -636,21 +639,21 @@
" rec_user=[]\n",
" item_pos=0\n",
" while len(rec_user)<10:\n",
- " if GlobalAvg[item_pos][0] not in user_rated:\n",
- " rec_user.append((item_code_id[GlobalAvg[item_pos][0]], GlobalAvg[item_pos][1]))\n",
+ " if top_rated[item_pos][0] not in user_rated:\n",
+ " rec_user.append((item_code_id[top_rated[item_pos][0]], top_rated[item_pos][1]))\n",
" item_pos+=1\n",
" result.append([user_code_id[u]]+list(chain(*rec_user)))\n",
"\n",
- "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_GlobalAvg_reco.csv', index=False, header=False)\n",
+ "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopRated_reco.csv', index=False, header=False)\n",
"\n",
"\n",
- "# estimations - score is a bit artificial since that method is not designed for scoring, but for ranking\n",
"\n",
"estimations=[]\n",
+ "d = dict(top_rated)\n",
"\n",
"for user, item in zip(*test_ui.nonzero()):\n",
- " estimations.append([user_code_id[user], item_code_id[item], avg])\n",
- "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_GlobalAvg_estimations.csv', index=False, header=False)"
+ " estimations.append([user_code_id[user], item_code_id[item], d[item]])\n",
+ "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopRated_estimations.csv', index=False, header=False)"
]
},
{
@@ -706,50 +709,50 @@
"
\n",
" 0 | \n",
" 1 | \n",
- " 5 | \n",
- " 3.529975 | \n",
- " 10 | \n",
- " 3.529975 | \n",
- " 25 | \n",
- " 3.529975 | \n",
- " 32 | \n",
- " 3.529975 | \n",
- " 33 | \n",
+ " 814 | \n",
+ " 5.0 | \n",
+ " 1122 | \n",
+ " 5.0 | \n",
+ " 1189 | \n",
+ " 5.0 | \n",
+ " 1201 | \n",
+ " 5.0 | \n",
+ " 1293 | \n",
" ... | \n",
- " 44 | \n",
- " 3.529975 | \n",
- " 46 | \n",
- " 3.529975 | \n",
- " 50 | \n",
- " 3.529975 | \n",
- " 52 | \n",
- " 3.529975 | \n",
- " 55 | \n",
- " 3.529975 | \n",
+ " 1306 | \n",
+ " 5.0 | \n",
+ " 1467 | \n",
+ " 5.0 | \n",
+ " 1491 | \n",
+ " 5.0 | \n",
+ " 1500 | \n",
+ " 5.0 | \n",
+ " 1536 | \n",
+ " 5.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
- " 1 | \n",
- " 3.529975 | \n",
- " 2 | \n",
- " 3.529975 | \n",
- " 3 | \n",
- " 3.529975 | \n",
- " 4 | \n",
- " 3.529975 | \n",
- " 5 | \n",
+ " 119 | \n",
+ " 5.0 | \n",
+ " 814 | \n",
+ " 5.0 | \n",
+ " 1122 | \n",
+ " 5.0 | \n",
+ " 1189 | \n",
+ " 5.0 | \n",
+ " 1201 | \n",
" ... | \n",
- " 6 | \n",
- " 3.529975 | \n",
- " 7 | \n",
- " 3.529975 | \n",
- " 8 | \n",
- " 3.529975 | \n",
- " 9 | \n",
- " 3.529975 | \n",
- " 11 | \n",
- " 3.529975 | \n",
+ " 1293 | \n",
+ " 5.0 | \n",
+ " 1306 | \n",
+ " 5.0 | \n",
+ " 1467 | \n",
+ " 5.0 | \n",
+ " 1491 | \n",
+ " 5.0 | \n",
+ " 1500 | \n",
+ " 5.0 | \n",
"
\n",
" \n",
"\n",
@@ -757,13 +760,13 @@
""
],
"text/plain": [
- " 0 1 2 3 4 5 6 7 8 9 ... 11 \\\n",
- "0 1 5 3.529975 10 3.529975 25 3.529975 32 3.529975 33 ... 44 \n",
- "1 2 1 3.529975 2 3.529975 3 3.529975 4 3.529975 5 ... 6 \n",
+ " 0 1 2 3 4 5 6 7 8 9 ... 11 12 13 \\\n",
+ "0 1 814 5.0 1122 5.0 1189 5.0 1201 5.0 1293 ... 1306 5.0 1467 \n",
+ "1 2 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n",
"\n",
- " 12 13 14 15 16 17 18 19 20 \n",
- "0 3.529975 46 3.529975 50 3.529975 52 3.529975 55 3.529975 \n",
- "1 3.529975 7 3.529975 8 3.529975 9 3.529975 11 3.529975 \n",
+ " 14 15 16 17 18 19 20 \n",
+ "0 5.0 1491 5.0 1500 5.0 1536 5.0 \n",
+ "1 5.0 1467 5.0 1491 5.0 1500 5.0 \n",
"\n",
"[2 rows x 21 columns]"
]
@@ -777,25 +780,6 @@
"pd.DataFrame(result)[:2]"
]
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Project task 1 - self made top rated"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [],
- "source": [
- "# project task 1: implement TopRated\n",
- "# Implement recommender system which will recommend movies (which user hasn't seen) with the highest average rating\n",
- "# The output should be saved in 'Recommendations generated/ml-100k/Self_TopRated_reco.csv'\n",
- "# and 'Recommendations generated/ml-100k/Self_TopRated_estimations.csv'"
- ]
- },
{
"cell_type": "markdown",
"metadata": {},
@@ -805,7 +789,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
@@ -825,7 +809,7 @@
" \n",
" max_row_mean=np.max(row_means)\n",
" row_means[row_means==0]=max_row_mean+1\n",
- " to_subtract_rows=sparse.diags(row_means)*result.power(0)\n",
+ " to_subtract_rows=sparse.diags(row_means)*(result>0)\n",
" to_subtract_rows.sort_indices() # needed to have valid .data\n",
" \n",
" subtract=to_subtract_rows.data\n",
@@ -878,7 +862,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -1046,7 +1030,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
@@ -1065,17 +1049,17 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# project task 2: implement self-made BaselineIU"
+ "# project task 1: implement self-made BaselineIU"
]
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"# Implement recommender system which will recommend movies (which user hasn't seen) which is similar to BaselineUI\n",
- "# but first subtract col means then row means\n",
+ "# but first subtract column means then row means\n",
"# The output should be saved in 'Recommendations generated/ml-100k/Self_BaselineIU_reco.csv'\n",
"# and 'Recommendations generated/ml-100k/Self_BaselineIU_estimations.csv'"
]
@@ -1089,7 +1073,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -1146,7 +1130,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -1163,7 +1147,7 @@
"0.7524871012820799"
]
},
- "execution_count": 24,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -1193,24 +1177,24 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "RMSE: 1.5317\n",
- "MAE: 1.2304\n"
+ "RMSE: 1.5147\n",
+ "MAE: 1.2155\n"
]
},
{
"data": {
"text/plain": [
- "1.2303840461147084"
+ "1.2154990549993152"
]
},
- "execution_count": 25,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
diff --git a/P1. Introduction and baseline.pdf b/P1. Introduction and baseline.pdf
index fe035c9..2bff70b 100644
Binary files a/P1. Introduction and baseline.pdf and b/P1. Introduction and baseline.pdf differ
diff --git a/P2. Evaluation.ipynb b/P2. Evaluation.ipynb
index 6962e33..1f5c329 100644
--- a/P2. Evaluation.ipynb
+++ b/P2. Evaluation.ipynb
@@ -273,7 +273,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "943it [00:00, 7666.87it/s]\n"
+ "943it [00:00, 6497.15it/s]\n"
]
},
{
@@ -477,7 +477,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "943it [00:00, 7370.69it/s]\n"
+ "943it [00:00, 5143.71it/s]\n"
]
},
{
@@ -585,11 +585,11 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "943it [00:00, 7772.74it/s]\n",
- "943it [00:00, 5607.69it/s]\n",
- "943it [00:00, 4737.64it/s]\n",
- "943it [00:00, 4986.41it/s]\n",
- "943it [00:00, 3513.77it/s]\n"
+ "943it [00:00, 3573.64it/s]\n",
+ "943it [00:00, 5141.54it/s]\n",
+ "943it [00:00, 2827.19it/s]\n",
+ "943it [00:00, 2513.13it/s]\n",
+ "943it [00:00, 3555.67it/s]\n"
]
}
],
@@ -670,27 +670,27 @@
" \n",
" \n",
" 0 | \n",
- " Self_GlobalAvg | \n",
- " 1.125760 | \n",
- " 0.943534 | \n",
- " 0.061188 | \n",
- " 0.025968 | \n",
- " 0.031383 | \n",
- " 0.041343 | \n",
- " 0.040558 | \n",
- " 0.032107 | \n",
+ " Ready_Random | \n",
+ " 1.525959 | \n",
+ " 1.225122 | \n",
+ " 0.047402 | \n",
+ " 0.020629 | \n",
+ " 0.024471 | \n",
+ " 0.032042 | \n",
+ " 0.027682 | \n",
+ " 0.019353 | \n",
"
\n",
" \n",
" 0 | \n",
- " Ready_Random | \n",
- " 1.531724 | \n",
- " 1.230384 | \n",
- " 0.049417 | \n",
- " 0.022558 | \n",
- " 0.025490 | \n",
- " 0.033242 | \n",
- " 0.030365 | \n",
- " 0.022626 | \n",
+ " Self_TopRated | \n",
+ " 1.030712 | \n",
+ " 0.820904 | \n",
+ " 0.000954 | \n",
+ " 0.000188 | \n",
+ " 0.000298 | \n",
+ " 0.000481 | \n",
+ " 0.000644 | \n",
+ " 0.000223 | \n",
"
\n",
" \n",
" 0 | \n",
@@ -712,15 +712,15 @@
" Model RMSE MAE precision recall F_1 \\\n",
"0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n",
"0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n",
- "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n",
- "0 Ready_Random 1.531724 1.230384 0.049417 0.022558 0.025490 \n",
+ "0 Ready_Random 1.525959 1.225122 0.047402 0.020629 0.024471 \n",
+ "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n",
"0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n",
"\n",
" F_05 precision_super recall_super \n",
"0 0.141584 0.130472 0.137473 \n",
"0 0.061286 0.079614 0.056463 \n",
- "0 0.041343 0.040558 0.032107 \n",
- "0 0.033242 0.030365 0.022626 \n",
+ "0 0.032042 0.027682 0.019353 \n",
+ "0 0.000481 0.000644 0.000223 \n",
"0 0.000463 0.000644 0.000189 "
]
},
@@ -800,29 +800,29 @@
"
\n",
" \n",
" 0 | \n",
- " Self_GlobalAvg | \n",
- " 0.067695 | \n",
- " 0.027470 | \n",
- " 0.171187 | \n",
- " 0.509546 | \n",
- " 0.384942 | \n",
- " 1.000000 | \n",
- " 0.025974 | \n",
- " 2.711772 | \n",
- " 0.992003 | \n",
+ " Ready_Random | \n",
+ " 0.051593 | \n",
+ " 0.019428 | \n",
+ " 0.129062 | \n",
+ " 0.506826 | \n",
+ " 0.336161 | \n",
+ " 0.987593 | \n",
+ " 0.175325 | \n",
+ " 5.087656 | \n",
+ " 0.908118 | \n",
"
\n",
" \n",
" 0 | \n",
- " Ready_Random | \n",
- " 0.054166 | \n",
- " 0.021656 | \n",
- " 0.128378 | \n",
- " 0.507802 | \n",
- " 0.325557 | \n",
- " 0.988865 | \n",
- " 0.190476 | \n",
- " 5.100033 | \n",
- " 0.907724 | \n",
+ " Self_TopRated | \n",
+ " 0.001043 | \n",
+ " 0.000335 | \n",
+ " 0.003348 | \n",
+ " 0.496433 | \n",
+ " 0.009544 | \n",
+ " 0.699046 | \n",
+ " 0.005051 | \n",
+ " 1.945910 | \n",
+ " 0.995669 | \n",
"
\n",
" \n",
" 0 | \n",
@@ -845,15 +845,15 @@
" Model NDCG mAP MRR LAUC HR \\\n",
"0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n",
"0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n",
- "0 Self_GlobalAvg 0.067695 0.027470 0.171187 0.509546 0.384942 \n",
- "0 Ready_Random 0.054166 0.021656 0.128378 0.507802 0.325557 \n",
+ "0 Ready_Random 0.051593 0.019428 0.129062 0.506826 0.336161 \n",
+ "0 Self_TopRated 0.001043 0.000335 0.003348 0.496433 0.009544 \n",
"0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n",
"\n",
" Reco in test Test coverage Shannon Gini \n",
"0 1.000000 0.038961 3.159079 0.987317 \n",
"0 1.000000 0.033911 2.836513 0.991139 \n",
- "0 1.000000 0.025974 2.711772 0.992003 \n",
- "0 0.988865 0.190476 5.100033 0.907724 \n",
+ "0 0.987593 0.175325 5.087656 0.908118 \n",
+ "0 0.699046 0.005051 1.945910 0.995669 \n",
"0 0.600530 0.005051 1.803126 0.996380 "
]
},
@@ -882,7 +882,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "3it [00:00, 1941.81it/s]\n"
+ "3it [00:00, 1191.68it/s]\n"
]
},
{
@@ -1246,148 +1246,148 @@
" \n",
"
\n",
" \n",
- " 2985 | \n",
- " 789 | \n",
+ " 50941 | \n",
+ " 661 | \n",
" 5 | \n",
- " Star Wars (1977) | \n",
- " Action, Adventure, Romance, Sci-Fi, War | \n",
- "
\n",
- " \n",
- " 25980 | \n",
- " 789 | \n",
- " 5 | \n",
- " Dead Man Walking (1995) | \n",
+ " It's a Wonderful Life (1946) | \n",
" Drama | \n",
"
\n",
" \n",
- " 9357 | \n",
- " 789 | \n",
+ " 9531 | \n",
+ " 661 | \n",
" 5 | \n",
- " Last Supper, The (1995) | \n",
- " Drama, Thriller | \n",
+ " Wizard of Oz, The (1939) | \n",
+ " Adventure, Children's, Drama, Musical | \n",
"
\n",
" \n",
- " 17306 | \n",
- " 789 | \n",
+ " 27182 | \n",
+ " 661 | \n",
" 5 | \n",
- " Leaving Las Vegas (1995) | \n",
- " Drama, Romance | \n",
+ " Empire Strikes Back, The (1980) | \n",
+ " Action, Adventure, Drama, Romance, Sci-Fi, War | \n",
"
\n",
" \n",
- " 36474 | \n",
- " 789 | \n",
+ " 23944 | \n",
+ " 661 | \n",
" 5 | \n",
- " Swingers (1996) | \n",
- " Comedy, Drama | \n",
+ " Apocalypse Now (1979) | \n",
+ " Drama, War | \n",
"
\n",
" \n",
- " 65139 | \n",
- " 789 | \n",
- " 4 | \n",
- " Welcome to the Dollhouse (1995) | \n",
- " Comedy, Drama | \n",
- "
\n",
- " \n",
- " 61975 | \n",
- " 789 | \n",
- " 4 | \n",
- " Private Parts (1997) | \n",
- " Comedy, Drama | \n",
- "
\n",
- " \n",
- " 56522 | \n",
- " 789 | \n",
- " 4 | \n",
- " Waiting for Guffman (1996) | \n",
- " Comedy | \n",
- "
\n",
- " \n",
- " 41414 | \n",
- " 789 | \n",
- " 4 | \n",
- " Donnie Brasco (1997) | \n",
- " Crime, Drama | \n",
- "
\n",
- " \n",
- " 36617 | \n",
- " 789 | \n",
- " 4 | \n",
- " Lone Star (1996) | \n",
- " Drama, Mystery | \n",
- "
\n",
- " \n",
- " 24501 | \n",
- " 789 | \n",
- " 4 | \n",
- " People vs. Larry Flynt, The (1996) | \n",
- " Drama | \n",
- "
\n",
- " \n",
- " 20210 | \n",
- " 789 | \n",
- " 4 | \n",
+ " 20285 | \n",
+ " 661 | \n",
+ " 5 | \n",
" Return of the Jedi (1983) | \n",
" Action, Adventure, Romance, Sci-Fi, War | \n",
"
\n",
" \n",
- " 8230 | \n",
- " 789 | \n",
- " 3 | \n",
- " Beautiful Girls (1996) | \n",
+ " 37504 | \n",
+ " 661 | \n",
+ " 5 | \n",
+ " Aladdin (1992) | \n",
+ " Animation, Children's, Comedy, Musical | \n",
+ "
\n",
+ " \n",
+ " 68312 | \n",
+ " 661 | \n",
+ " 5 | \n",
+ " Babe (1995) | \n",
+ " Children's, Comedy, Drama | \n",
+ "
\n",
+ " \n",
+ " 16362 | \n",
+ " 661 | \n",
+ " 5 | \n",
+ " Apollo 13 (1995) | \n",
+ " Action, Drama, Thriller | \n",
+ "
\n",
+ " \n",
+ " 15168 | \n",
+ " 661 | \n",
+ " 5 | \n",
+ " Indiana Jones and the Last Crusade (1989) | \n",
+ " Action, Adventure | \n",
+ "
\n",
+ " \n",
+ " 29402 | \n",
+ " 661 | \n",
+ " 5 | \n",
+ " Psycho (1960) | \n",
+ " Horror, Romance, Thriller | \n",
+ "
\n",
+ " \n",
+ " 40755 | \n",
+ " 661 | \n",
+ " 5 | \n",
+ " Jean de Florette (1986) | \n",
" Drama | \n",
"
\n",
" \n",
- " 19781 | \n",
- " 789 | \n",
- " 3 | \n",
- " Liar Liar (1997) | \n",
- " Comedy | \n",
+ " 41950 | \n",
+ " 661 | \n",
+ " 5 | \n",
+ " Die Hard (1988) | \n",
+ " Action, Thriller | \n",
"
\n",
" \n",
- " 39387 | \n",
- " 789 | \n",
- " 3 | \n",
- " Sleepers (1996) | \n",
- " Crime, Drama | \n",
+ " 58932 | \n",
+ " 661 | \n",
+ " 5 | \n",
+ " Enchanted April (1991) | \n",
+ " Drama | \n",
+ "
\n",
+ " \n",
+ " 43013 | \n",
+ " 661 | \n",
+ " 5 | \n",
+ " 2001: A Space Odyssey (1968) | \n",
+ " Drama, Mystery, Sci-Fi, Thriller | \n",
+ "
\n",
+ " \n",
+ " 65664 | \n",
+ " 661 | \n",
+ " 5 | \n",
+ " Star Trek: The Wrath of Khan (1982) | \n",
+ " Action, Adventure, Sci-Fi | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " user rating title \\\n",
- "2985 789 5 Star Wars (1977) \n",
- "25980 789 5 Dead Man Walking (1995) \n",
- "9357 789 5 Last Supper, The (1995) \n",
- "17306 789 5 Leaving Las Vegas (1995) \n",
- "36474 789 5 Swingers (1996) \n",
- "65139 789 4 Welcome to the Dollhouse (1995) \n",
- "61975 789 4 Private Parts (1997) \n",
- "56522 789 4 Waiting for Guffman (1996) \n",
- "41414 789 4 Donnie Brasco (1997) \n",
- "36617 789 4 Lone Star (1996) \n",
- "24501 789 4 People vs. Larry Flynt, The (1996) \n",
- "20210 789 4 Return of the Jedi (1983) \n",
- "8230 789 3 Beautiful Girls (1996) \n",
- "19781 789 3 Liar Liar (1997) \n",
- "39387 789 3 Sleepers (1996) \n",
+ " user rating title \\\n",
+ "50941 661 5 It's a Wonderful Life (1946) \n",
+ "9531 661 5 Wizard of Oz, The (1939) \n",
+ "27182 661 5 Empire Strikes Back, The (1980) \n",
+ "23944 661 5 Apocalypse Now (1979) \n",
+ "20285 661 5 Return of the Jedi (1983) \n",
+ "37504 661 5 Aladdin (1992) \n",
+ "68312 661 5 Babe (1995) \n",
+ "16362 661 5 Apollo 13 (1995) \n",
+ "15168 661 5 Indiana Jones and the Last Crusade (1989) \n",
+ "29402 661 5 Psycho (1960) \n",
+ "40755 661 5 Jean de Florette (1986) \n",
+ "41950 661 5 Die Hard (1988) \n",
+ "58932 661 5 Enchanted April (1991) \n",
+ "43013 661 5 2001: A Space Odyssey (1968) \n",
+ "65664 661 5 Star Trek: The Wrath of Khan (1982) \n",
"\n",
- " genres \n",
- "2985 Action, Adventure, Romance, Sci-Fi, War \n",
- "25980 Drama \n",
- "9357 Drama, Thriller \n",
- "17306 Drama, Romance \n",
- "36474 Comedy, Drama \n",
- "65139 Comedy, Drama \n",
- "61975 Comedy, Drama \n",
- "56522 Comedy \n",
- "41414 Crime, Drama \n",
- "36617 Drama, Mystery \n",
- "24501 Drama \n",
- "20210 Action, Adventure, Romance, Sci-Fi, War \n",
- "8230 Drama \n",
- "19781 Comedy \n",
- "39387 Crime, Drama "
+ " genres \n",
+ "50941 Drama \n",
+ "9531 Adventure, Children's, Drama, Musical \n",
+ "27182 Action, Adventure, Drama, Romance, Sci-Fi, War \n",
+ "23944 Drama, War \n",
+ "20285 Action, Adventure, Romance, Sci-Fi, War \n",
+ "37504 Animation, Children's, Comedy, Musical \n",
+ "68312 Children's, Comedy, Drama \n",
+ "16362 Action, Drama, Thriller \n",
+ "15168 Action, Adventure \n",
+ "29402 Horror, Romance, Thriller \n",
+ "40755 Drama \n",
+ "41950 Action, Thriller \n",
+ "58932 Drama \n",
+ "43013 Drama, Mystery, Sci-Fi, Thriller \n",
+ "65664 Action, Adventure, Sci-Fi "
]
},
"metadata": {},
@@ -1429,71 +1429,71 @@
" \n",
" \n",
" \n",
- " 787 | \n",
- " 789.0 | \n",
+ " 659 | \n",
+ " 661.0 | \n",
" 1 | \n",
" Great Day in Harlem, A (1994) | \n",
" Documentary | \n",
"
\n",
" \n",
- " 1729 | \n",
- " 789.0 | \n",
+ " 1601 | \n",
+ " 661.0 | \n",
" 2 | \n",
" Tough and Deadly (1995) | \n",
" Action, Drama, Thriller | \n",
"
\n",
" \n",
- " 2671 | \n",
- " 789.0 | \n",
+ " 2543 | \n",
+ " 661.0 | \n",
" 3 | \n",
" Aiqing wansui (1994) | \n",
" Drama | \n",
"
\n",
" \n",
- " 3613 | \n",
- " 789.0 | \n",
+ " 3485 | \n",
+ " 661.0 | \n",
" 4 | \n",
" Delta of Venus (1994) | \n",
" Drama | \n",
"
\n",
" \n",
- " 4555 | \n",
- " 789.0 | \n",
+ " 4427 | \n",
+ " 661.0 | \n",
" 5 | \n",
" Someone Else's America (1995) | \n",
" Drama | \n",
"
\n",
" \n",
- " 5497 | \n",
- " 789.0 | \n",
+ " 5369 | \n",
+ " 661.0 | \n",
" 6 | \n",
" Saint of Fort Washington, The (1993) | \n",
" Drama | \n",
"
\n",
" \n",
- " 6439 | \n",
- " 789.0 | \n",
+ " 6311 | \n",
+ " 661.0 | \n",
" 7 | \n",
" Celestial Clockwork (1994) | \n",
" Comedy | \n",
"
\n",
" \n",
- " 7380 | \n",
- " 789.0 | \n",
+ " 7253 | \n",
+ " 661.0 | \n",
" 8 | \n",
" Some Mother's Son (1996) | \n",
" Drama | \n",
"
\n",
" \n",
- " 9276 | \n",
- " 789.0 | \n",
+ " 9148 | \n",
+ " 661.0 | \n",
" 9 | \n",
" Maya Lin: A Strong Clear Vision (1994) | \n",
" Documentary | \n",
"
\n",
" \n",
- " 8322 | \n",
- " 789.0 | \n",
+ " 8194 | \n",
+ " 661.0 | \n",
" 10 | \n",
" Prefontaine (1997) | \n",
" Drama | \n",
@@ -1504,28 +1504,28 @@
],
"text/plain": [
" user rec_nb title \\\n",
- "787 789.0 1 Great Day in Harlem, A (1994) \n",
- "1729 789.0 2 Tough and Deadly (1995) \n",
- "2671 789.0 3 Aiqing wansui (1994) \n",
- "3613 789.0 4 Delta of Venus (1994) \n",
- "4555 789.0 5 Someone Else's America (1995) \n",
- "5497 789.0 6 Saint of Fort Washington, The (1993) \n",
- "6439 789.0 7 Celestial Clockwork (1994) \n",
- "7380 789.0 8 Some Mother's Son (1996) \n",
- "9276 789.0 9 Maya Lin: A Strong Clear Vision (1994) \n",
- "8322 789.0 10 Prefontaine (1997) \n",
+ "659 661.0 1 Great Day in Harlem, A (1994) \n",
+ "1601 661.0 2 Tough and Deadly (1995) \n",
+ "2543 661.0 3 Aiqing wansui (1994) \n",
+ "3485 661.0 4 Delta of Venus (1994) \n",
+ "4427 661.0 5 Someone Else's America (1995) \n",
+ "5369 661.0 6 Saint of Fort Washington, The (1993) \n",
+ "6311 661.0 7 Celestial Clockwork (1994) \n",
+ "7253 661.0 8 Some Mother's Son (1996) \n",
+ "9148 661.0 9 Maya Lin: A Strong Clear Vision (1994) \n",
+ "8194 661.0 10 Prefontaine (1997) \n",
"\n",
" genres \n",
- "787 Documentary \n",
- "1729 Action, Drama, Thriller \n",
- "2671 Drama \n",
- "3613 Drama \n",
- "4555 Drama \n",
- "5497 Drama \n",
- "6439 Comedy \n",
- "7380 Drama \n",
- "9276 Documentary \n",
- "8322 Drama "
+ "659 Documentary \n",
+ "1601 Action, Drama, Thriller \n",
+ "2543 Drama \n",
+ "3485 Drama \n",
+ "4427 Drama \n",
+ "5369 Drama \n",
+ "6311 Comedy \n",
+ "7253 Drama \n",
+ "9148 Documentary \n",
+ "8194 Drama "
]
},
"execution_count": 15,
@@ -1595,11 +1595,11 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "943it [00:00, 4479.94it/s]\n",
- "943it [00:00, 4036.40it/s]\n",
- "943it [00:00, 4598.99it/s]\n",
- "943it [00:00, 5170.18it/s]\n",
- "943it [00:00, 4778.23it/s]\n"
+ "943it [00:00, 4220.01it/s]\n",
+ "943it [00:00, 3015.35it/s]\n",
+ "943it [00:00, 2308.31it/s]\n",
+ "943it [00:00, 3461.11it/s]\n",
+ "943it [00:00, 3442.41it/s]\n"
]
},
{
@@ -1688,45 +1688,45 @@
"
\n",
" \n",
" 0 | \n",
- " Self_GlobalAvg | \n",
- " 1.125760 | \n",
- " 0.943534 | \n",
- " 0.061188 | \n",
- " 0.025968 | \n",
- " 0.031383 | \n",
- " 0.041343 | \n",
- " 0.040558 | \n",
- " 0.032107 | \n",
- " 0.067695 | \n",
- " 0.027470 | \n",
- " 0.171187 | \n",
- " 0.509546 | \n",
- " 0.384942 | \n",
- " 1.000000 | \n",
- " 0.025974 | \n",
- " 2.711772 | \n",
- " 0.992003 | \n",
+ " Ready_Random | \n",
+ " 1.525959 | \n",
+ " 1.225122 | \n",
+ " 0.047402 | \n",
+ " 0.020629 | \n",
+ " 0.024471 | \n",
+ " 0.032042 | \n",
+ " 0.027682 | \n",
+ " 0.019353 | \n",
+ " 0.051593 | \n",
+ " 0.019428 | \n",
+ " 0.129062 | \n",
+ " 0.506826 | \n",
+ " 0.336161 | \n",
+ " 0.987593 | \n",
+ " 0.175325 | \n",
+ " 5.087656 | \n",
+ " 0.908118 | \n",
"
\n",
" \n",
" 0 | \n",
- " Ready_Random | \n",
- " 1.531724 | \n",
- " 1.230384 | \n",
- " 0.049417 | \n",
- " 0.022558 | \n",
- " 0.025490 | \n",
- " 0.033242 | \n",
- " 0.030365 | \n",
- " 0.022626 | \n",
- " 0.054166 | \n",
- " 0.021656 | \n",
- " 0.128378 | \n",
- " 0.507802 | \n",
- " 0.325557 | \n",
- " 0.988865 | \n",
- " 0.190476 | \n",
- " 5.100033 | \n",
- " 0.907724 | \n",
+ " Self_TopRated | \n",
+ " 1.030712 | \n",
+ " 0.820904 | \n",
+ " 0.000954 | \n",
+ " 0.000188 | \n",
+ " 0.000298 | \n",
+ " 0.000481 | \n",
+ " 0.000644 | \n",
+ " 0.000223 | \n",
+ " 0.001043 | \n",
+ " 0.000335 | \n",
+ " 0.003348 | \n",
+ " 0.496433 | \n",
+ " 0.009544 | \n",
+ " 0.699046 | \n",
+ " 0.005051 | \n",
+ " 1.945910 | \n",
+ " 0.995669 | \n",
"
\n",
" \n",
" 0 | \n",
@@ -1757,22 +1757,22 @@
" Model RMSE MAE precision recall F_1 \\\n",
"0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n",
"0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n",
- "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n",
- "0 Ready_Random 1.531724 1.230384 0.049417 0.022558 0.025490 \n",
+ "0 Ready_Random 1.525959 1.225122 0.047402 0.020629 0.024471 \n",
+ "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n",
"0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n",
"\n",
" F_05 precision_super recall_super NDCG mAP MRR \\\n",
"0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n",
"0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n",
- "0 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 \n",
- "0 0.033242 0.030365 0.022626 0.054166 0.021656 0.128378 \n",
+ "0 0.032042 0.027682 0.019353 0.051593 0.019428 0.129062 \n",
+ "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n",
"0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n",
"\n",
" LAUC HR Reco in test Test coverage Shannon Gini \n",
"0 0.555546 0.765642 1.000000 0.038961 3.159079 0.987317 \n",
"0 0.515501 0.437964 1.000000 0.033911 2.836513 0.991139 \n",
- "0 0.509546 0.384942 1.000000 0.025974 2.711772 0.992003 \n",
- "0 0.507802 0.325557 0.988865 0.190476 5.100033 0.907724 \n",
+ "0 0.506826 0.336161 0.987593 0.175325 5.087656 0.908118 \n",
+ "0 0.496433 0.009544 0.699046 0.005051 1.945910 0.995669 \n",
"0 0.496424 0.009544 0.600530 0.005051 1.803126 0.996380 "
]
},
diff --git a/P2. Evaluation.pdf b/P2. Evaluation.pdf
index 60e0c74..44b1b4c 100644
Binary files a/P2. Evaluation.pdf and b/P2. Evaluation.pdf differ