finished first 2 lectures

This commit is contained in:
Robert 2021-03-23 21:52:46 +01:00
parent 18d5c09409
commit e36414e7ce
5 changed files with 360 additions and 374 deletions

View File

@ -13,12 +13,15 @@
"metadata": {},
"outputs": [],
"source": [
"# if you don't have some library installed try using pip (or pip3) to install it - you can do it from the notebook\n",
"# example: !pip install tqdm\n",
"# also on labs it's better to use python3 kernel - ipython3 notebook\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import scipy.sparse as sparse\n",
"import time\n",
"import random\n",
"import evaluation_measures as ev\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"import os\n",
@ -161,7 +164,7 @@
"text": [
"We have 943 users, 1682 items and 100000 ratings.\n",
"\n",
"Average number of ratings per user is 106.04. \n",
"Average number of ratings per user is 106.0445. \n",
"\n",
"Average number of ratings per item is 59.453.\n",
"\n",
@ -170,13 +173,13 @@
}
],
"source": [
"users, items, ratings=len(set(df['user'])), len(set(df['item'])), len(df)\n",
"users, items, ratings=df['user'].nunique(), df['item'].nunique(), len(df)\n",
"\n",
"print('We have {} users, {} items and {} ratings.\\n'.format(users, items, ratings))\n",
"print(f'We have {users} users, {items} items and {ratings} ratings.\\n')\n",
"\n",
"print('Average number of ratings per user is {}. \\n'.format(round(ratings/users,2)))\n",
"print('Average number of ratings per item is {}.\\n'.format(round(ratings/items,4)))\n",
"print('Data sparsity (% of missing entries) is {}%.'.format(round(100*ratings/(users*items),4)))"
"print(f'Average number of ratings per user is {round(ratings/users,4)}. \\n')\n",
"print(f'Average number of ratings per item is {round(ratings/items,4)}.\\n')\n",
"print(f'Data sparsity (% of missing entries) is {round(100*ratings/(users*items),4)}%.')"
]
},
{
@ -636,7 +639,6 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.makedirs('./Datasets/toy-example/', exist_ok = True)"
]
},

View File

@ -239,11 +239,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Number of ratings: 8 \n",
"Number of users: 3 \n",
"Number of items: 4 \n",
"\n"
"Number of ratings: 8\n",
"Number of users: 3\n",
"Number of items: 4\n"
]
}
],
@ -251,8 +249,9 @@
"print('Ratings matrix with missing entries replaced by zeros:')\n",
"display(sample_csr.todense())\n",
"\n",
"print('\\nNumber of ratings: {} \\nNumber of users: {} \\nNumber of items: {} \\n'\n",
" .format(sample_csr.nnz, sample_csr.shape[0], sample_csr.shape[1]))"
"print(f'Number of ratings: {sample_csr.nnz}')\n",
"print(f'Number of users: {sample_csr.shape[0]}')\n",
"print(f'Number of items: {sample_csr.shape[1]}')"
]
},
{
@ -278,7 +277,7 @@
"print('Regarding items:', sample_csr.indices)\n",
"\n",
"for i in range(sample_csr.shape[0]):\n",
" print('Where ratings from {} to {} belongs to user {}.'.format(sample_csr.indptr[i], sample_csr.indptr[i+1]-1, i))"
" print(f'Where ratings from {sample_csr.indptr[i]} to {sample_csr.indptr[i+1]-1} belongs to user {i}.')"
]
},
{
@ -307,7 +306,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"885 ns ± 165 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n",
"1.44 µs ± 184 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n",
"Inefficient way to access items rated by user:\n"
]
},
@ -325,7 +324,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"153 µs ± 9.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
"172 µs ± 14.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
]
}
],
@ -482,7 +481,7 @@
"display(sparse.diags(row_means).todense())\n",
"\n",
"print(\"\"\"Let's apply them in nonzero entries:\"\"\")\n",
"to_subtract=sparse.diags(row_means)*sample_csr.power(0)\n",
"to_subtract=sparse.diags(row_means)*(sample_csr>0)\n",
"display(to_subtract.todense())\n",
"\n",
"print(\"Finally after subtraction:\")\n",
@ -573,26 +572,26 @@
"metadata": {},
"outputs": [],
"source": [
"TopPop=[]\n",
"train_iu=train_ui.transpose().tocsr()\n",
"scaling_factor=train_ui.max()/max(np.diff(train_iu.indptr))\n",
"top_pop = []\n",
"train_iu = train_ui.transpose().tocsr()\n",
"scaling_factor = train_ui.max()/max(np.diff(train_iu.indptr))\n",
"\n",
"for i in range(train_iu.shape[0]):\n",
" TopPop.append((i, (train_iu.indptr[i+1]-train_iu.indptr[i])*scaling_factor))\n",
" top_pop.append((i, (train_iu.indptr[i+1]-train_iu.indptr[i])*scaling_factor))\n",
" \n",
"TopPop.sort(key=lambda x: x[1], reverse=True)\n",
"#TopPop is an array of pairs (item, rescaled_popularity) sorted descending from the most popular\n",
"top_pop.sort(key=lambda x: x[1], reverse=True)\n",
"#top_pop is an array of pairs (item, rescaled_popularity) sorted descending from the most popular\n",
"\n",
"k=10\n",
"result=[]\n",
"k = 10\n",
"result = []\n",
"\n",
"for u in range(train_ui.shape[0]):\n",
" user_rated=train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n",
" rec_user=[]\n",
" item_pos=0\n",
" user_rated = train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]\n",
" rec_user = []\n",
" item_pos = 0\n",
" while len(rec_user)<10:\n",
" if TopPop[item_pos][0] not in user_rated:\n",
" rec_user.append((item_code_id[TopPop[item_pos][0]], TopPop[item_pos][1]))\n",
" if top_pop[item_pos][0] not in user_rated:\n",
" rec_user.append((item_code_id[top_pop[item_pos][0]], top_pop[item_pos][1]))\n",
" item_pos+=1\n",
" result.append([user_code_id[u]]+list(chain(*rec_user)))\n",
"\n",
@ -613,7 +612,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Self made global average"
"# Self made top rated"
]
},
{
@ -622,11 +621,15 @@
"metadata": {},
"outputs": [],
"source": [
"GlobalAvg=[]\n",
"avg=np.sum(train_ui)/train_ui.nnz\n",
"top_rated = []\n",
"global_avg = sum(train_iu.data)/train_ui.nnz\n",
"\n",
"for i in range(train_iu.shape[0]):\n",
" GlobalAvg.append((i, avg))\n",
" ratings = train_iu.data[train_iu.indptr[i]: train_iu.indptr[i+1]]\n",
" avg = np.mean(ratings) if len(ratings)>0 else global_avg\n",
" top_rated.append((i, avg))\n",
" \n",
"top_rated.sort(key=lambda x: x[1], reverse=True)\n",
" \n",
"k=10\n",
"result=[]\n",
@ -636,21 +639,21 @@
" rec_user=[]\n",
" item_pos=0\n",
" while len(rec_user)<10:\n",
" if GlobalAvg[item_pos][0] not in user_rated:\n",
" rec_user.append((item_code_id[GlobalAvg[item_pos][0]], GlobalAvg[item_pos][1]))\n",
" if top_rated[item_pos][0] not in user_rated:\n",
" rec_user.append((item_code_id[top_rated[item_pos][0]], top_rated[item_pos][1]))\n",
" item_pos+=1\n",
" result.append([user_code_id[u]]+list(chain(*rec_user)))\n",
"\n",
"(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_GlobalAvg_reco.csv', index=False, header=False)\n",
"(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopRated_reco.csv', index=False, header=False)\n",
"\n",
"\n",
"# estimations - score is a bit artificial since that method is not designed for scoring, but for ranking\n",
"\n",
"estimations=[]\n",
"d = dict(top_rated)\n",
"\n",
"for user, item in zip(*test_ui.nonzero()):\n",
" estimations.append([user_code_id[user], item_code_id[item], avg])\n",
"(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_GlobalAvg_estimations.csv', index=False, header=False)"
" estimations.append([user_code_id[user], item_code_id[item], d[item]])\n",
"(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopRated_estimations.csv', index=False, header=False)"
]
},
{
@ -706,50 +709,50 @@
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>3.529975</td>\n",
" <td>10</td>\n",
" <td>3.529975</td>\n",
" <td>25</td>\n",
" <td>3.529975</td>\n",
" <td>32</td>\n",
" <td>3.529975</td>\n",
" <td>33</td>\n",
" <td>814</td>\n",
" <td>5.0</td>\n",
" <td>1122</td>\n",
" <td>5.0</td>\n",
" <td>1189</td>\n",
" <td>5.0</td>\n",
" <td>1201</td>\n",
" <td>5.0</td>\n",
" <td>1293</td>\n",
" <td>...</td>\n",
" <td>44</td>\n",
" <td>3.529975</td>\n",
" <td>46</td>\n",
" <td>3.529975</td>\n",
" <td>50</td>\n",
" <td>3.529975</td>\n",
" <td>52</td>\n",
" <td>3.529975</td>\n",
" <td>55</td>\n",
" <td>3.529975</td>\n",
" <td>1306</td>\n",
" <td>5.0</td>\n",
" <td>1467</td>\n",
" <td>5.0</td>\n",
" <td>1491</td>\n",
" <td>5.0</td>\n",
" <td>1500</td>\n",
" <td>5.0</td>\n",
" <td>1536</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>3.529975</td>\n",
" <td>2</td>\n",
" <td>3.529975</td>\n",
" <td>3</td>\n",
" <td>3.529975</td>\n",
" <td>4</td>\n",
" <td>3.529975</td>\n",
" <td>5</td>\n",
" <td>119</td>\n",
" <td>5.0</td>\n",
" <td>814</td>\n",
" <td>5.0</td>\n",
" <td>1122</td>\n",
" <td>5.0</td>\n",
" <td>1189</td>\n",
" <td>5.0</td>\n",
" <td>1201</td>\n",
" <td>...</td>\n",
" <td>6</td>\n",
" <td>3.529975</td>\n",
" <td>7</td>\n",
" <td>3.529975</td>\n",
" <td>8</td>\n",
" <td>3.529975</td>\n",
" <td>9</td>\n",
" <td>3.529975</td>\n",
" <td>11</td>\n",
" <td>3.529975</td>\n",
" <td>1293</td>\n",
" <td>5.0</td>\n",
" <td>1306</td>\n",
" <td>5.0</td>\n",
" <td>1467</td>\n",
" <td>5.0</td>\n",
" <td>1491</td>\n",
" <td>5.0</td>\n",
" <td>1500</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
@ -757,13 +760,13 @@
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 8 9 ... 11 \\\n",
"0 1 5 3.529975 10 3.529975 25 3.529975 32 3.529975 33 ... 44 \n",
"1 2 1 3.529975 2 3.529975 3 3.529975 4 3.529975 5 ... 6 \n",
" 0 1 2 3 4 5 6 7 8 9 ... 11 12 13 \\\n",
"0 1 814 5.0 1122 5.0 1189 5.0 1201 5.0 1293 ... 1306 5.0 1467 \n",
"1 2 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n",
"\n",
" 12 13 14 15 16 17 18 19 20 \n",
"0 3.529975 46 3.529975 50 3.529975 52 3.529975 55 3.529975 \n",
"1 3.529975 7 3.529975 8 3.529975 9 3.529975 11 3.529975 \n",
" 14 15 16 17 18 19 20 \n",
"0 5.0 1491 5.0 1500 5.0 1536 5.0 \n",
"1 5.0 1467 5.0 1491 5.0 1500 5.0 \n",
"\n",
"[2 rows x 21 columns]"
]
@ -777,25 +780,6 @@
"pd.DataFrame(result)[:2]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Project task 1 - self made top rated"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# project task 1: implement TopRated\n",
"# Implement recommender system which will recommend movies (which user hasn't seen) with the highest average rating\n",
"# The output should be saved in 'Recommendations generated/ml-100k/Self_TopRated_reco.csv'\n",
"# and 'Recommendations generated/ml-100k/Self_TopRated_estimations.csv'"
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -805,7 +789,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
@ -825,7 +809,7 @@
" \n",
" max_row_mean=np.max(row_means)\n",
" row_means[row_means==0]=max_row_mean+1\n",
" to_subtract_rows=sparse.diags(row_means)*result.power(0)\n",
" to_subtract_rows=sparse.diags(row_means)*(result>0)\n",
" to_subtract_rows.sort_indices() # needed to have valid .data\n",
" \n",
" subtract=to_subtract_rows.data\n",
@ -878,7 +862,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 19,
"metadata": {},
"outputs": [
{
@ -1046,7 +1030,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
@ -1065,17 +1049,17 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# project task 2: implement self-made BaselineIU"
"# project task 1: implement self-made BaselineIU"
]
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"# Implement recommender system which will recommend movies (which user hasn't seen) which is similar to BaselineUI\n",
"# but first subtract col means then row means\n",
"# but first subtract column means then row means\n",
"# The output should be saved in 'Recommendations generated/ml-100k/Self_BaselineIU_reco.csv'\n",
"# and 'Recommendations generated/ml-100k/Self_BaselineIU_estimations.csv'"
]
@ -1089,7 +1073,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 22,
"metadata": {},
"outputs": [
{
@ -1146,7 +1130,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 23,
"metadata": {},
"outputs": [
{
@ -1163,7 +1147,7 @@
"0.7524871012820799"
]
},
"execution_count": 24,
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@ -1193,24 +1177,24 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RMSE: 1.5317\n",
"MAE: 1.2304\n"
"RMSE: 1.5147\n",
"MAE: 1.2155\n"
]
},
{
"data": {
"text/plain": [
"1.2303840461147084"
"1.2154990549993152"
]
},
"execution_count": 25,
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}

Binary file not shown.

View File

@ -273,7 +273,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"943it [00:00, 7666.87it/s]\n"
"943it [00:00, 6497.15it/s]\n"
]
},
{
@ -477,7 +477,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"943it [00:00, 7370.69it/s]\n"
"943it [00:00, 5143.71it/s]\n"
]
},
{
@ -585,11 +585,11 @@
"name": "stderr",
"output_type": "stream",
"text": [
"943it [00:00, 7772.74it/s]\n",
"943it [00:00, 5607.69it/s]\n",
"943it [00:00, 4737.64it/s]\n",
"943it [00:00, 4986.41it/s]\n",
"943it [00:00, 3513.77it/s]\n"
"943it [00:00, 3573.64it/s]\n",
"943it [00:00, 5141.54it/s]\n",
"943it [00:00, 2827.19it/s]\n",
"943it [00:00, 2513.13it/s]\n",
"943it [00:00, 3555.67it/s]\n"
]
}
],
@ -670,27 +670,27 @@
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_GlobalAvg</td>\n",
" <td>1.125760</td>\n",
" <td>0.943534</td>\n",
" <td>0.061188</td>\n",
" <td>0.025968</td>\n",
" <td>0.031383</td>\n",
" <td>0.041343</td>\n",
" <td>0.040558</td>\n",
" <td>0.032107</td>\n",
" <td>Ready_Random</td>\n",
" <td>1.525959</td>\n",
" <td>1.225122</td>\n",
" <td>0.047402</td>\n",
" <td>0.020629</td>\n",
" <td>0.024471</td>\n",
" <td>0.032042</td>\n",
" <td>0.027682</td>\n",
" <td>0.019353</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_Random</td>\n",
" <td>1.531724</td>\n",
" <td>1.230384</td>\n",
" <td>0.049417</td>\n",
" <td>0.022558</td>\n",
" <td>0.025490</td>\n",
" <td>0.033242</td>\n",
" <td>0.030365</td>\n",
" <td>0.022626</td>\n",
" <td>Self_TopRated</td>\n",
" <td>1.030712</td>\n",
" <td>0.820904</td>\n",
" <td>0.000954</td>\n",
" <td>0.000188</td>\n",
" <td>0.000298</td>\n",
" <td>0.000481</td>\n",
" <td>0.000644</td>\n",
" <td>0.000223</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
@ -712,15 +712,15 @@
" Model RMSE MAE precision recall F_1 \\\n",
"0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n",
"0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n",
"0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n",
"0 Ready_Random 1.531724 1.230384 0.049417 0.022558 0.025490 \n",
"0 Ready_Random 1.525959 1.225122 0.047402 0.020629 0.024471 \n",
"0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n",
"0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n",
"\n",
" F_05 precision_super recall_super \n",
"0 0.141584 0.130472 0.137473 \n",
"0 0.061286 0.079614 0.056463 \n",
"0 0.041343 0.040558 0.032107 \n",
"0 0.033242 0.030365 0.022626 \n",
"0 0.032042 0.027682 0.019353 \n",
"0 0.000481 0.000644 0.000223 \n",
"0 0.000463 0.000644 0.000189 "
]
},
@ -800,29 +800,29 @@
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_GlobalAvg</td>\n",
" <td>0.067695</td>\n",
" <td>0.027470</td>\n",
" <td>0.171187</td>\n",
" <td>0.509546</td>\n",
" <td>0.384942</td>\n",
" <td>1.000000</td>\n",
" <td>0.025974</td>\n",
" <td>2.711772</td>\n",
" <td>0.992003</td>\n",
" <td>Ready_Random</td>\n",
" <td>0.051593</td>\n",
" <td>0.019428</td>\n",
" <td>0.129062</td>\n",
" <td>0.506826</td>\n",
" <td>0.336161</td>\n",
" <td>0.987593</td>\n",
" <td>0.175325</td>\n",
" <td>5.087656</td>\n",
" <td>0.908118</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_Random</td>\n",
" <td>0.054166</td>\n",
" <td>0.021656</td>\n",
" <td>0.128378</td>\n",
" <td>0.507802</td>\n",
" <td>0.325557</td>\n",
" <td>0.988865</td>\n",
" <td>0.190476</td>\n",
" <td>5.100033</td>\n",
" <td>0.907724</td>\n",
" <td>Self_TopRated</td>\n",
" <td>0.001043</td>\n",
" <td>0.000335</td>\n",
" <td>0.003348</td>\n",
" <td>0.496433</td>\n",
" <td>0.009544</td>\n",
" <td>0.699046</td>\n",
" <td>0.005051</td>\n",
" <td>1.945910</td>\n",
" <td>0.995669</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
@ -845,15 +845,15 @@
" Model NDCG mAP MRR LAUC HR \\\n",
"0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n",
"0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n",
"0 Self_GlobalAvg 0.067695 0.027470 0.171187 0.509546 0.384942 \n",
"0 Ready_Random 0.054166 0.021656 0.128378 0.507802 0.325557 \n",
"0 Ready_Random 0.051593 0.019428 0.129062 0.506826 0.336161 \n",
"0 Self_TopRated 0.001043 0.000335 0.003348 0.496433 0.009544 \n",
"0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n",
"\n",
" Reco in test Test coverage Shannon Gini \n",
"0 1.000000 0.038961 3.159079 0.987317 \n",
"0 1.000000 0.033911 2.836513 0.991139 \n",
"0 1.000000 0.025974 2.711772 0.992003 \n",
"0 0.988865 0.190476 5.100033 0.907724 \n",
"0 0.987593 0.175325 5.087656 0.908118 \n",
"0 0.699046 0.005051 1.945910 0.995669 \n",
"0 0.600530 0.005051 1.803126 0.996380 "
]
},
@ -882,7 +882,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"3it [00:00, 1941.81it/s]\n"
"3it [00:00, 1191.68it/s]\n"
]
},
{
@ -1246,109 +1246,109 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2985</th>\n",
" <td>789</td>\n",
" <th>50941</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Star Wars (1977)</td>\n",
" <td>Action, Adventure, Romance, Sci-Fi, War</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25980</th>\n",
" <td>789</td>\n",
" <td>5</td>\n",
" <td>Dead Man Walking (1995)</td>\n",
" <td>It's a Wonderful Life (1946)</td>\n",
" <td>Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9357</th>\n",
" <td>789</td>\n",
" <th>9531</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Last Supper, The (1995)</td>\n",
" <td>Drama, Thriller</td>\n",
" <td>Wizard of Oz, The (1939)</td>\n",
" <td>Adventure, Children's, Drama, Musical</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17306</th>\n",
" <td>789</td>\n",
" <th>27182</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Leaving Las Vegas (1995)</td>\n",
" <td>Drama, Romance</td>\n",
" <td>Empire Strikes Back, The (1980)</td>\n",
" <td>Action, Adventure, Drama, Romance, Sci-Fi, War</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36474</th>\n",
" <td>789</td>\n",
" <th>23944</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Swingers (1996)</td>\n",
" <td>Comedy, Drama</td>\n",
" <td>Apocalypse Now (1979)</td>\n",
" <td>Drama, War</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65139</th>\n",
" <td>789</td>\n",
" <td>4</td>\n",
" <td>Welcome to the Dollhouse (1995)</td>\n",
" <td>Comedy, Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61975</th>\n",
" <td>789</td>\n",
" <td>4</td>\n",
" <td>Private Parts (1997)</td>\n",
" <td>Comedy, Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56522</th>\n",
" <td>789</td>\n",
" <td>4</td>\n",
" <td>Waiting for Guffman (1996)</td>\n",
" <td>Comedy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41414</th>\n",
" <td>789</td>\n",
" <td>4</td>\n",
" <td>Donnie Brasco (1997)</td>\n",
" <td>Crime, Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36617</th>\n",
" <td>789</td>\n",
" <td>4</td>\n",
" <td>Lone Star (1996)</td>\n",
" <td>Drama, Mystery</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24501</th>\n",
" <td>789</td>\n",
" <td>4</td>\n",
" <td>People vs. Larry Flynt, The (1996)</td>\n",
" <td>Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20210</th>\n",
" <td>789</td>\n",
" <td>4</td>\n",
" <th>20285</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Return of the Jedi (1983)</td>\n",
" <td>Action, Adventure, Romance, Sci-Fi, War</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8230</th>\n",
" <td>789</td>\n",
" <td>3</td>\n",
" <td>Beautiful Girls (1996)</td>\n",
" <th>37504</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Aladdin (1992)</td>\n",
" <td>Animation, Children's, Comedy, Musical</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68312</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Babe (1995)</td>\n",
" <td>Children's, Comedy, Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16362</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Apollo 13 (1995)</td>\n",
" <td>Action, Drama, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15168</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Indiana Jones and the Last Crusade (1989)</td>\n",
" <td>Action, Adventure</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29402</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Psycho (1960)</td>\n",
" <td>Horror, Romance, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40755</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Jean de Florette (1986)</td>\n",
" <td>Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19781</th>\n",
" <td>789</td>\n",
" <td>3</td>\n",
" <td>Liar Liar (1997)</td>\n",
" <td>Comedy</td>\n",
" <th>41950</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Die Hard (1988)</td>\n",
" <td>Action, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39387</th>\n",
" <td>789</td>\n",
" <td>3</td>\n",
" <td>Sleepers (1996)</td>\n",
" <td>Crime, Drama</td>\n",
" <th>58932</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Enchanted April (1991)</td>\n",
" <td>Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43013</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>2001: A Space Odyssey (1968)</td>\n",
" <td>Drama, Mystery, Sci-Fi, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65664</th>\n",
" <td>661</td>\n",
" <td>5</td>\n",
" <td>Star Trek: The Wrath of Khan (1982)</td>\n",
" <td>Action, Adventure, Sci-Fi</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
@ -1356,38 +1356,38 @@
],
"text/plain": [
" user rating title \\\n",
"2985 789 5 Star Wars (1977) \n",
"25980 789 5 Dead Man Walking (1995) \n",
"9357 789 5 Last Supper, The (1995) \n",
"17306 789 5 Leaving Las Vegas (1995) \n",
"36474 789 5 Swingers (1996) \n",
"65139 789 4 Welcome to the Dollhouse (1995) \n",
"61975 789 4 Private Parts (1997) \n",
"56522 789 4 Waiting for Guffman (1996) \n",
"41414 789 4 Donnie Brasco (1997) \n",
"36617 789 4 Lone Star (1996) \n",
"24501 789 4 People vs. Larry Flynt, The (1996) \n",
"20210 789 4 Return of the Jedi (1983) \n",
"8230 789 3 Beautiful Girls (1996) \n",
"19781 789 3 Liar Liar (1997) \n",
"39387 789 3 Sleepers (1996) \n",
"50941 661 5 It's a Wonderful Life (1946) \n",
"9531 661 5 Wizard of Oz, The (1939) \n",
"27182 661 5 Empire Strikes Back, The (1980) \n",
"23944 661 5 Apocalypse Now (1979) \n",
"20285 661 5 Return of the Jedi (1983) \n",
"37504 661 5 Aladdin (1992) \n",
"68312 661 5 Babe (1995) \n",
"16362 661 5 Apollo 13 (1995) \n",
"15168 661 5 Indiana Jones and the Last Crusade (1989) \n",
"29402 661 5 Psycho (1960) \n",
"40755 661 5 Jean de Florette (1986) \n",
"41950 661 5 Die Hard (1988) \n",
"58932 661 5 Enchanted April (1991) \n",
"43013 661 5 2001: A Space Odyssey (1968) \n",
"65664 661 5 Star Trek: The Wrath of Khan (1982) \n",
"\n",
" genres \n",
"2985 Action, Adventure, Romance, Sci-Fi, War \n",
"25980 Drama \n",
"9357 Drama, Thriller \n",
"17306 Drama, Romance \n",
"36474 Comedy, Drama \n",
"65139 Comedy, Drama \n",
"61975 Comedy, Drama \n",
"56522 Comedy \n",
"41414 Crime, Drama \n",
"36617 Drama, Mystery \n",
"24501 Drama \n",
"20210 Action, Adventure, Romance, Sci-Fi, War \n",
"8230 Drama \n",
"19781 Comedy \n",
"39387 Crime, Drama "
"50941 Drama \n",
"9531 Adventure, Children's, Drama, Musical \n",
"27182 Action, Adventure, Drama, Romance, Sci-Fi, War \n",
"23944 Drama, War \n",
"20285 Action, Adventure, Romance, Sci-Fi, War \n",
"37504 Animation, Children's, Comedy, Musical \n",
"68312 Children's, Comedy, Drama \n",
"16362 Action, Drama, Thriller \n",
"15168 Action, Adventure \n",
"29402 Horror, Romance, Thriller \n",
"40755 Drama \n",
"41950 Action, Thriller \n",
"58932 Drama \n",
"43013 Drama, Mystery, Sci-Fi, Thriller \n",
"65664 Action, Adventure, Sci-Fi "
]
},
"metadata": {},
@ -1429,71 +1429,71 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>787</th>\n",
" <td>789.0</td>\n",
" <th>659</th>\n",
" <td>661.0</td>\n",
" <td>1</td>\n",
" <td>Great Day in Harlem, A (1994)</td>\n",
" <td>Documentary</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1729</th>\n",
" <td>789.0</td>\n",
" <th>1601</th>\n",
" <td>661.0</td>\n",
" <td>2</td>\n",
" <td>Tough and Deadly (1995)</td>\n",
" <td>Action, Drama, Thriller</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2671</th>\n",
" <td>789.0</td>\n",
" <th>2543</th>\n",
" <td>661.0</td>\n",
" <td>3</td>\n",
" <td>Aiqing wansui (1994)</td>\n",
" <td>Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3613</th>\n",
" <td>789.0</td>\n",
" <th>3485</th>\n",
" <td>661.0</td>\n",
" <td>4</td>\n",
" <td>Delta of Venus (1994)</td>\n",
" <td>Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4555</th>\n",
" <td>789.0</td>\n",
" <th>4427</th>\n",
" <td>661.0</td>\n",
" <td>5</td>\n",
" <td>Someone Else's America (1995)</td>\n",
" <td>Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5497</th>\n",
" <td>789.0</td>\n",
" <th>5369</th>\n",
" <td>661.0</td>\n",
" <td>6</td>\n",
" <td>Saint of Fort Washington, The (1993)</td>\n",
" <td>Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6439</th>\n",
" <td>789.0</td>\n",
" <th>6311</th>\n",
" <td>661.0</td>\n",
" <td>7</td>\n",
" <td>Celestial Clockwork (1994)</td>\n",
" <td>Comedy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7380</th>\n",
" <td>789.0</td>\n",
" <th>7253</th>\n",
" <td>661.0</td>\n",
" <td>8</td>\n",
" <td>Some Mother's Son (1996)</td>\n",
" <td>Drama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9276</th>\n",
" <td>789.0</td>\n",
" <th>9148</th>\n",
" <td>661.0</td>\n",
" <td>9</td>\n",
" <td>Maya Lin: A Strong Clear Vision (1994)</td>\n",
" <td>Documentary</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8322</th>\n",
" <td>789.0</td>\n",
" <th>8194</th>\n",
" <td>661.0</td>\n",
" <td>10</td>\n",
" <td>Prefontaine (1997)</td>\n",
" <td>Drama</td>\n",
@ -1504,28 +1504,28 @@
],
"text/plain": [
" user rec_nb title \\\n",
"787 789.0 1 Great Day in Harlem, A (1994) \n",
"1729 789.0 2 Tough and Deadly (1995) \n",
"2671 789.0 3 Aiqing wansui (1994) \n",
"3613 789.0 4 Delta of Venus (1994) \n",
"4555 789.0 5 Someone Else's America (1995) \n",
"5497 789.0 6 Saint of Fort Washington, The (1993) \n",
"6439 789.0 7 Celestial Clockwork (1994) \n",
"7380 789.0 8 Some Mother's Son (1996) \n",
"9276 789.0 9 Maya Lin: A Strong Clear Vision (1994) \n",
"8322 789.0 10 Prefontaine (1997) \n",
"659 661.0 1 Great Day in Harlem, A (1994) \n",
"1601 661.0 2 Tough and Deadly (1995) \n",
"2543 661.0 3 Aiqing wansui (1994) \n",
"3485 661.0 4 Delta of Venus (1994) \n",
"4427 661.0 5 Someone Else's America (1995) \n",
"5369 661.0 6 Saint of Fort Washington, The (1993) \n",
"6311 661.0 7 Celestial Clockwork (1994) \n",
"7253 661.0 8 Some Mother's Son (1996) \n",
"9148 661.0 9 Maya Lin: A Strong Clear Vision (1994) \n",
"8194 661.0 10 Prefontaine (1997) \n",
"\n",
" genres \n",
"787 Documentary \n",
"1729 Action, Drama, Thriller \n",
"2671 Drama \n",
"3613 Drama \n",
"4555 Drama \n",
"5497 Drama \n",
"6439 Comedy \n",
"7380 Drama \n",
"9276 Documentary \n",
"8322 Drama "
"659 Documentary \n",
"1601 Action, Drama, Thriller \n",
"2543 Drama \n",
"3485 Drama \n",
"4427 Drama \n",
"5369 Drama \n",
"6311 Comedy \n",
"7253 Drama \n",
"9148 Documentary \n",
"8194 Drama "
]
},
"execution_count": 15,
@ -1595,11 +1595,11 @@
"name": "stderr",
"output_type": "stream",
"text": [
"943it [00:00, 4479.94it/s]\n",
"943it [00:00, 4036.40it/s]\n",
"943it [00:00, 4598.99it/s]\n",
"943it [00:00, 5170.18it/s]\n",
"943it [00:00, 4778.23it/s]\n"
"943it [00:00, 4220.01it/s]\n",
"943it [00:00, 3015.35it/s]\n",
"943it [00:00, 2308.31it/s]\n",
"943it [00:00, 3461.11it/s]\n",
"943it [00:00, 3442.41it/s]\n"
]
},
{
@ -1688,45 +1688,45 @@
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Self_GlobalAvg</td>\n",
" <td>1.125760</td>\n",
" <td>0.943534</td>\n",
" <td>0.061188</td>\n",
" <td>0.025968</td>\n",
" <td>0.031383</td>\n",
" <td>0.041343</td>\n",
" <td>0.040558</td>\n",
" <td>0.032107</td>\n",
" <td>0.067695</td>\n",
" <td>0.027470</td>\n",
" <td>0.171187</td>\n",
" <td>0.509546</td>\n",
" <td>0.384942</td>\n",
" <td>1.000000</td>\n",
" <td>0.025974</td>\n",
" <td>2.711772</td>\n",
" <td>0.992003</td>\n",
" <td>Ready_Random</td>\n",
" <td>1.525959</td>\n",
" <td>1.225122</td>\n",
" <td>0.047402</td>\n",
" <td>0.020629</td>\n",
" <td>0.024471</td>\n",
" <td>0.032042</td>\n",
" <td>0.027682</td>\n",
" <td>0.019353</td>\n",
" <td>0.051593</td>\n",
" <td>0.019428</td>\n",
" <td>0.129062</td>\n",
" <td>0.506826</td>\n",
" <td>0.336161</td>\n",
" <td>0.987593</td>\n",
" <td>0.175325</td>\n",
" <td>5.087656</td>\n",
" <td>0.908118</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Ready_Random</td>\n",
" <td>1.531724</td>\n",
" <td>1.230384</td>\n",
" <td>0.049417</td>\n",
" <td>0.022558</td>\n",
" <td>0.025490</td>\n",
" <td>0.033242</td>\n",
" <td>0.030365</td>\n",
" <td>0.022626</td>\n",
" <td>0.054166</td>\n",
" <td>0.021656</td>\n",
" <td>0.128378</td>\n",
" <td>0.507802</td>\n",
" <td>0.325557</td>\n",
" <td>0.988865</td>\n",
" <td>0.190476</td>\n",
" <td>5.100033</td>\n",
" <td>0.907724</td>\n",
" <td>Self_TopRated</td>\n",
" <td>1.030712</td>\n",
" <td>0.820904</td>\n",
" <td>0.000954</td>\n",
" <td>0.000188</td>\n",
" <td>0.000298</td>\n",
" <td>0.000481</td>\n",
" <td>0.000644</td>\n",
" <td>0.000223</td>\n",
" <td>0.001043</td>\n",
" <td>0.000335</td>\n",
" <td>0.003348</td>\n",
" <td>0.496433</td>\n",
" <td>0.009544</td>\n",
" <td>0.699046</td>\n",
" <td>0.005051</td>\n",
" <td>1.945910</td>\n",
" <td>0.995669</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
@ -1757,22 +1757,22 @@
" Model RMSE MAE precision recall F_1 \\\n",
"0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n",
"0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n",
"0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n",
"0 Ready_Random 1.531724 1.230384 0.049417 0.022558 0.025490 \n",
"0 Ready_Random 1.525959 1.225122 0.047402 0.020629 0.024471 \n",
"0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n",
"0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n",
"\n",
" F_05 precision_super recall_super NDCG mAP MRR \\\n",
"0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n",
"0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n",
"0 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 \n",
"0 0.033242 0.030365 0.022626 0.054166 0.021656 0.128378 \n",
"0 0.032042 0.027682 0.019353 0.051593 0.019428 0.129062 \n",
"0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n",
"0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n",
"\n",
" LAUC HR Reco in test Test coverage Shannon Gini \n",
"0 0.555546 0.765642 1.000000 0.038961 3.159079 0.987317 \n",
"0 0.515501 0.437964 1.000000 0.033911 2.836513 0.991139 \n",
"0 0.509546 0.384942 1.000000 0.025974 2.711772 0.992003 \n",
"0 0.507802 0.325557 0.988865 0.190476 5.100033 0.907724 \n",
"0 0.506826 0.336161 0.987593 0.175325 5.087656 0.908118 \n",
"0 0.496433 0.009544 0.699046 0.005051 1.945910 0.995669 \n",
"0 0.496424 0.009544 0.600530 0.005051 1.803126 0.996380 "
]
},

Binary file not shown.