diff --git a/P0. Data preparation.ipynb b/P0. Data preparation.ipynb index 9b20a57..69f002c 100644 --- a/P0. Data preparation.ipynb +++ b/P0. Data preparation.ipynb @@ -243,22 +243,22 @@ } ], "source": [ - "items_per_user=df.groupby(['item']).count()['rating']\n", + "users_per_item=df.groupby(['item']).count()['rating']\n", "\n", "plt.figure(figsize=(16,8))\n", - "plt.hist(items_per_user, bins=100)\n", + "plt.hist(users_per_item, bins=100)\n", "\n", "# Let's add median\n", - "t=items_per_user.median()\n", + "t=users_per_item.median()\n", "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", "plt.text(t*1.1, plt.ylim()[1]*0.9, 'Median: {:.0f}'.format(t))\n", "\n", "# Let's add also some percentiles\n", - "t=items_per_user.quantile(0.25)\n", + "t=users_per_item.quantile(0.25)\n", "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", "plt.text(t*1.1, plt.ylim()[1]*0.95, '25% quantile: {:.0f}'.format(t))\n", "\n", - "t=items_per_user.quantile(0.75)\n", + "t=users_per_item.quantile(0.75)\n", "plt.axvline(t, color='k', linestyle='dashed', linewidth=1)\n", "plt.text(t*1.05, plt.ylim()[1]*0.95, '75% quantile: {:.0f}'.format(t))\n", "\n", diff --git a/P1. Baseline.ipynb b/P1. Baseline.ipynb index c76aa0a..58d6b8a 100644 --- a/P1. Baseline.ipynb +++ b/P1. Baseline.ipynb @@ -306,7 +306,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "1.44 µs ± 184 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n", + "1.13 µs ± 79.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n", "Inefficient way to access items rated by user:\n" ] }, @@ -324,7 +324,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "172 µs ± 14.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" + "149 µs ± 11.5 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" ] } ], @@ -1184,14 +1184,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "RMSE: 1.5147\n", - "MAE: 1.2155\n" + "RMSE: 1.5239\n", + "MAE: 1.2268\n" ] }, { "data": { "text/plain": [ - "1.2154990549993152" + "1.2267993503843746" ] }, "execution_count": 24, diff --git a/P2. Evaluation.ipynb b/P2. Evaluation.ipynb index 1f5c329..1bc216d 100644 --- a/P2. Evaluation.ipynb +++ b/P2. Evaluation.ipynb @@ -273,7 +273,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "943it [00:00, 6497.15it/s]\n" + "943it [00:00, 7783.14it/s]\n" ] }, { @@ -477,7 +477,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "943it [00:00, 5143.71it/s]\n" + "943it [00:00, 7347.78it/s]\n" ] }, { @@ -585,11 +585,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "943it [00:00, 3573.64it/s]\n", - "943it [00:00, 5141.54it/s]\n", - "943it [00:00, 2827.19it/s]\n", - "943it [00:00, 2513.13it/s]\n", - "943it [00:00, 3555.67it/s]\n" + "943it [00:00, 4894.39it/s]\n", + "943it [00:00, 4357.39it/s]\n", + "943it [00:00, 5045.11it/s]\n", + "943it [00:00, 4855.03it/s]\n", + "943it [00:00, 5359.75it/s]\n" ] } ], @@ -671,14 +671,14 @@ " \n", " 0\n", " Ready_Random\n", - " 1.525959\n", - " 1.225122\n", - " 0.047402\n", - " 0.020629\n", - " 0.024471\n", - " 0.032042\n", - " 0.027682\n", - " 0.019353\n", + " 1.523899\n", + " 1.226799\n", + " 0.046872\n", + " 0.022367\n", + " 0.025297\n", + " 0.032269\n", + " 0.031116\n", + " 0.027843\n", " \n", " \n", " 0\n", @@ -712,14 +712,14 @@ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", - "0 Ready_Random 1.525959 1.225122 0.047402 0.020629 0.024471 \n", + "0 Ready_Random 1.523899 1.226799 0.046872 0.022367 0.025297 \n", "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "\n", " F_05 precision_super recall_super \n", "0 0.141584 0.130472 0.137473 \n", "0 0.061286 0.079614 0.056463 \n", - "0 0.032042 0.027682 0.019353 \n", + "0 0.032269 0.031116 0.027843 \n", "0 0.000481 0.000644 0.000223 \n", "0 0.000463 0.000644 0.000189 " ] @@ -801,15 +801,15 @@ " \n", " 0\n", " Ready_Random\n", - " 0.051593\n", - " 0.019428\n", - " 0.129062\n", - " 0.506826\n", - " 0.336161\n", + " 0.051414\n", + " 0.019769\n", + " 0.127558\n", + " 0.507696\n", + " 0.332980\n", " 0.987593\n", - " 0.175325\n", - " 5.087656\n", - " 0.908118\n", + " 0.184704\n", + " 5.104710\n", + " 0.906035\n", " \n", " \n", " 0\n", @@ -845,14 +845,14 @@ " Model NDCG mAP MRR LAUC HR \\\n", "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n", "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n", - "0 Ready_Random 0.051593 0.019428 0.129062 0.506826 0.336161 \n", + "0 Ready_Random 0.051414 0.019769 0.127558 0.507696 0.332980 \n", "0 Self_TopRated 0.001043 0.000335 0.003348 0.496433 0.009544 \n", "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n", "\n", " Reco in test Test coverage Shannon Gini \n", "0 1.000000 0.038961 3.159079 0.987317 \n", "0 1.000000 0.033911 2.836513 0.991139 \n", - "0 0.987593 0.175325 5.087656 0.908118 \n", + "0 0.987593 0.184704 5.104710 0.906035 \n", "0 0.699046 0.005051 1.945910 0.995669 \n", "0 0.600530 0.005051 1.803126 0.996380 " ] @@ -882,7 +882,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "3it [00:00, 1191.68it/s]\n" + "3it [00:00, 4226.71it/s]\n" ] }, { @@ -1246,148 +1246,148 @@ " \n", " \n", " \n", - " 50941\n", - " 661\n", + " 54092\n", + " 365\n", " 5\n", - " It's a Wonderful Life (1946)\n", + " Boogie Nights (1997)\n", " Drama\n", " \n", " \n", - " 9531\n", - " 661\n", + " 55243\n", + " 365\n", " 5\n", - " Wizard of Oz, The (1939)\n", - " Adventure, Children's, Drama, Musical\n", + " Celluloid Closet, The (1995)\n", + " Documentary\n", " \n", " \n", - " 27182\n", - " 661\n", + " 21637\n", + " 365\n", " 5\n", - " Empire Strikes Back, The (1980)\n", - " Action, Adventure, Drama, Romance, Sci-Fi, War\n", + " In & Out (1997)\n", + " Comedy\n", " \n", " \n", - " 23944\n", - " 661\n", + " 36508\n", + " 365\n", " 5\n", - " Apocalypse Now (1979)\n", - " Drama, War\n", + " Swingers (1996)\n", + " Comedy, Drama\n", " \n", " \n", - " 20285\n", - " 661\n", + " 19398\n", + " 365\n", " 5\n", - " Return of the Jedi (1983)\n", - " Action, Adventure, Romance, Sci-Fi, War\n", + " Scream (1996)\n", + " Horror, Thriller\n", " \n", " \n", - " 37504\n", - " 661\n", + " 14343\n", + " 365\n", " 5\n", - " Aladdin (1992)\n", - " Animation, Children's, Comedy, Musical\n", + " Fargo (1996)\n", + " Crime, Drama, Thriller\n", " \n", " \n", - " 68312\n", - " 661\n", + " 23738\n", + " 365\n", " 5\n", - " Babe (1995)\n", - " Children's, Comedy, Drama\n", + " Chasing Amy (1997)\n", + " Drama, Romance\n", " \n", " \n", - " 16362\n", - " 661\n", + " 69960\n", + " 365\n", " 5\n", - " Apollo 13 (1995)\n", - " Action, Drama, Thriller\n", + " Beautiful Thing (1996)\n", + " Drama, Romance\n", " \n", " \n", - " 15168\n", - " 661\n", - " 5\n", - " Indiana Jones and the Last Crusade (1989)\n", - " Action, Adventure\n", + " 54753\n", + " 365\n", + " 4\n", + " Scream 2 (1997)\n", + " Horror, Thriller\n", " \n", " \n", - " 29402\n", - " 661\n", - " 5\n", - " Psycho (1960)\n", - " Horror, Romance, Thriller\n", + " 54552\n", + " 365\n", + " 4\n", + " Sense and Sensibility (1995)\n", + " Drama, Romance\n", " \n", " \n", - " 40755\n", - " 661\n", - " 5\n", - " Jean de Florette (1986)\n", - " Drama\n", - " \n", - " \n", - " 41950\n", - " 661\n", - " 5\n", - " Die Hard (1988)\n", - " Action, Thriller\n", - " \n", - " \n", - " 58932\n", - " 661\n", - " 5\n", - " Enchanted April (1991)\n", - " Drama\n", - " \n", - " \n", - " 43013\n", - " 661\n", - " 5\n", - " 2001: A Space Odyssey (1968)\n", - " Drama, Mystery, Sci-Fi, Thriller\n", - " \n", - " \n", - " 65664\n", - " 661\n", - " 5\n", - " Star Trek: The Wrath of Khan (1982)\n", + " 30051\n", + " 365\n", + " 4\n", + " Star Trek: First Contact (1996)\n", " Action, Adventure, Sci-Fi\n", " \n", + " \n", + " 47086\n", + " 365\n", + " 4\n", + " Primal Fear (1996)\n", + " Drama, Thriller\n", + " \n", + " \n", + " 62931\n", + " 365\n", + " 4\n", + " James and the Giant Peach (1996)\n", + " Animation, Children's, Musical\n", + " \n", + " \n", + " 38939\n", + " 365\n", + " 4\n", + " Full Monty, The (1997)\n", + " Comedy\n", + " \n", + " \n", + " 38764\n", + " 365\n", + " 4\n", + " First Wives Club, The (1996)\n", + " Comedy\n", + " \n", " \n", "\n", "" ], "text/plain": [ - " user rating title \\\n", - "50941 661 5 It's a Wonderful Life (1946) \n", - "9531 661 5 Wizard of Oz, The (1939) \n", - "27182 661 5 Empire Strikes Back, The (1980) \n", - "23944 661 5 Apocalypse Now (1979) \n", - "20285 661 5 Return of the Jedi (1983) \n", - "37504 661 5 Aladdin (1992) \n", - "68312 661 5 Babe (1995) \n", - "16362 661 5 Apollo 13 (1995) \n", - "15168 661 5 Indiana Jones and the Last Crusade (1989) \n", - "29402 661 5 Psycho (1960) \n", - "40755 661 5 Jean de Florette (1986) \n", - "41950 661 5 Die Hard (1988) \n", - "58932 661 5 Enchanted April (1991) \n", - "43013 661 5 2001: A Space Odyssey (1968) \n", - "65664 661 5 Star Trek: The Wrath of Khan (1982) \n", + " user rating title \\\n", + "54092 365 5 Boogie Nights (1997) \n", + "55243 365 5 Celluloid Closet, The (1995) \n", + "21637 365 5 In & Out (1997) \n", + "36508 365 5 Swingers (1996) \n", + "19398 365 5 Scream (1996) \n", + "14343 365 5 Fargo (1996) \n", + "23738 365 5 Chasing Amy (1997) \n", + "69960 365 5 Beautiful Thing (1996) \n", + "54753 365 4 Scream 2 (1997) \n", + "54552 365 4 Sense and Sensibility (1995) \n", + "30051 365 4 Star Trek: First Contact (1996) \n", + "47086 365 4 Primal Fear (1996) \n", + "62931 365 4 James and the Giant Peach (1996) \n", + "38939 365 4 Full Monty, The (1997) \n", + "38764 365 4 First Wives Club, The (1996) \n", "\n", - " genres \n", - "50941 Drama \n", - "9531 Adventure, Children's, Drama, Musical \n", - "27182 Action, Adventure, Drama, Romance, Sci-Fi, War \n", - "23944 Drama, War \n", - "20285 Action, Adventure, Romance, Sci-Fi, War \n", - "37504 Animation, Children's, Comedy, Musical \n", - "68312 Children's, Comedy, Drama \n", - "16362 Action, Drama, Thriller \n", - "15168 Action, Adventure \n", - "29402 Horror, Romance, Thriller \n", - "40755 Drama \n", - "41950 Action, Thriller \n", - "58932 Drama \n", - "43013 Drama, Mystery, Sci-Fi, Thriller \n", - "65664 Action, Adventure, Sci-Fi " + " genres \n", + "54092 Drama \n", + "55243 Documentary \n", + "21637 Comedy \n", + "36508 Comedy, Drama \n", + "19398 Horror, Thriller \n", + "14343 Crime, Drama, Thriller \n", + "23738 Drama, Romance \n", + "69960 Drama, Romance \n", + "54753 Horror, Thriller \n", + "54552 Drama, Romance \n", + "30051 Action, Adventure, Sci-Fi \n", + "47086 Drama, Thriller \n", + "62931 Animation, Children's, Musical \n", + "38939 Comedy \n", + "38764 Comedy " ] }, "metadata": {}, @@ -1429,71 +1429,71 @@ " \n", " \n", " \n", - " 659\n", - " 661.0\n", + " 363\n", + " 365.0\n", " 1\n", " Great Day in Harlem, A (1994)\n", " Documentary\n", " \n", " \n", - " 1601\n", - " 661.0\n", + " 1305\n", + " 365.0\n", " 2\n", " Tough and Deadly (1995)\n", " Action, Drama, Thriller\n", " \n", " \n", - " 2543\n", - " 661.0\n", + " 2248\n", + " 365.0\n", " 3\n", " Aiqing wansui (1994)\n", " Drama\n", " \n", " \n", - " 3485\n", - " 661.0\n", + " 3189\n", + " 365.0\n", " 4\n", " Delta of Venus (1994)\n", " Drama\n", " \n", " \n", - " 4427\n", - " 661.0\n", + " 4132\n", + " 365.0\n", " 5\n", " Someone Else's America (1995)\n", " Drama\n", " \n", " \n", - " 5369\n", - " 661.0\n", + " 5073\n", + " 365.0\n", " 6\n", " Saint of Fort Washington, The (1993)\n", " Drama\n", " \n", " \n", - " 6311\n", - " 661.0\n", + " 6015\n", + " 365.0\n", " 7\n", " Celestial Clockwork (1994)\n", " Comedy\n", " \n", " \n", - " 7253\n", - " 661.0\n", + " 6958\n", + " 365.0\n", " 8\n", " Some Mother's Son (1996)\n", " Drama\n", " \n", " \n", - " 9148\n", - " 661.0\n", + " 8852\n", + " 365.0\n", " 9\n", " Maya Lin: A Strong Clear Vision (1994)\n", " Documentary\n", " \n", " \n", - " 8194\n", - " 661.0\n", + " 7898\n", + " 365.0\n", " 10\n", " Prefontaine (1997)\n", " Drama\n", @@ -1504,28 +1504,28 @@ ], "text/plain": [ " user rec_nb title \\\n", - "659 661.0 1 Great Day in Harlem, A (1994) \n", - "1601 661.0 2 Tough and Deadly (1995) \n", - "2543 661.0 3 Aiqing wansui (1994) \n", - "3485 661.0 4 Delta of Venus (1994) \n", - "4427 661.0 5 Someone Else's America (1995) \n", - "5369 661.0 6 Saint of Fort Washington, The (1993) \n", - "6311 661.0 7 Celestial Clockwork (1994) \n", - "7253 661.0 8 Some Mother's Son (1996) \n", - "9148 661.0 9 Maya Lin: A Strong Clear Vision (1994) \n", - "8194 661.0 10 Prefontaine (1997) \n", + "363 365.0 1 Great Day in Harlem, A (1994) \n", + "1305 365.0 2 Tough and Deadly (1995) \n", + "2248 365.0 3 Aiqing wansui (1994) \n", + "3189 365.0 4 Delta of Venus (1994) \n", + "4132 365.0 5 Someone Else's America (1995) \n", + "5073 365.0 6 Saint of Fort Washington, The (1993) \n", + "6015 365.0 7 Celestial Clockwork (1994) \n", + "6958 365.0 8 Some Mother's Son (1996) \n", + "8852 365.0 9 Maya Lin: A Strong Clear Vision (1994) \n", + "7898 365.0 10 Prefontaine (1997) \n", "\n", " genres \n", - "659 Documentary \n", - "1601 Action, Drama, Thriller \n", - "2543 Drama \n", - "3485 Drama \n", - "4427 Drama \n", - "5369 Drama \n", - "6311 Comedy \n", - "7253 Drama \n", - "9148 Documentary \n", - "8194 Drama " + "363 Documentary \n", + "1305 Action, Drama, Thriller \n", + "2248 Drama \n", + "3189 Drama \n", + "4132 Drama \n", + "5073 Drama \n", + "6015 Comedy \n", + "6958 Drama \n", + "8852 Documentary \n", + "7898 Drama " ] }, "execution_count": 15, @@ -1571,7 +1571,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# project task 3: implement some other evaluation measure" + "# project task 2: implement some other evaluation measure" ] }, { @@ -1595,11 +1595,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "943it [00:00, 4220.01it/s]\n", - "943it [00:00, 3015.35it/s]\n", - "943it [00:00, 2308.31it/s]\n", - "943it [00:00, 3461.11it/s]\n", - "943it [00:00, 3442.41it/s]\n" + "943it [00:00, 4859.65it/s]\n", + "943it [00:00, 4809.91it/s]\n", + "943it [00:00, 4678.68it/s]\n", + "943it [00:00, 3240.04it/s]\n", + "943it [00:00, 4796.98it/s]\n" ] }, { @@ -1689,23 +1689,23 @@ " \n", " 0\n", " Ready_Random\n", - " 1.525959\n", - " 1.225122\n", - " 0.047402\n", - " 0.020629\n", - " 0.024471\n", - " 0.032042\n", - " 0.027682\n", - " 0.019353\n", - " 0.051593\n", - " 0.019428\n", - " 0.129062\n", - " 0.506826\n", - " 0.336161\n", + " 1.523899\n", + " 1.226799\n", + " 0.046872\n", + " 0.022367\n", + " 0.025297\n", + " 0.032269\n", + " 0.031116\n", + " 0.027843\n", + " 0.051414\n", + " 0.019769\n", + " 0.127558\n", + " 0.507696\n", + " 0.332980\n", " 0.987593\n", - " 0.175325\n", - " 5.087656\n", - " 0.908118\n", + " 0.184704\n", + " 5.104710\n", + " 0.906035\n", " \n", " \n", " 0\n", @@ -1757,21 +1757,21 @@ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", - "0 Ready_Random 1.525959 1.225122 0.047402 0.020629 0.024471 \n", + "0 Ready_Random 1.523899 1.226799 0.046872 0.022367 0.025297 \n", "0 Self_TopRated 1.030712 0.820904 0.000954 0.000188 0.000298 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "\n", " F_05 precision_super recall_super NDCG mAP MRR \\\n", "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n", "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n", - "0 0.032042 0.027682 0.019353 0.051593 0.019428 0.129062 \n", + "0 0.032269 0.031116 0.027843 0.051414 0.019769 0.127558 \n", "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n", "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n", "\n", " LAUC HR Reco in test Test coverage Shannon Gini \n", "0 0.555546 0.765642 1.000000 0.038961 3.159079 0.987317 \n", "0 0.515501 0.437964 1.000000 0.033911 2.836513 0.991139 \n", - "0 0.506826 0.336161 0.987593 0.175325 5.087656 0.908118 \n", + "0 0.507696 0.332980 0.987593 0.184704 5.104710 0.906035 \n", "0 0.496433 0.009544 0.699046 0.005051 1.945910 0.995669 \n", "0 0.496424 0.009544 0.600530 0.005051 1.803126 0.996380 " ] diff --git a/README.md b/README.md index d83e084..24d190e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,3 @@ -# introduction_to_recommender_systems +# Introduction to recommender systems Materiały do przedmiotu Wprowadzenie do systemów rekomendacyjnych, semestr letni 2020/2021. \ No newline at end of file