From 18d5c0940995a6f1011841b06de0cfaaa9060b16 Mon Sep 17 00:00:00 2001 From: Robert Date: Sat, 20 Mar 2021 20:13:28 +0100 Subject: [PATCH] slides updated --- P0. Data preparation.ipynb | 42 +- P1. Baseline.ipynb | 10 +- P2. Evaluation.ipynb | 1431 +++++++----------------------------- 3 files changed, 276 insertions(+), 1207 deletions(-) diff --git a/P0. Data preparation.ipynb b/P0. Data preparation.ipynb index 63b0137..c87a932 100644 --- a/P0. Data preparation.ipynb +++ b/P0. Data preparation.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -55,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -134,7 +134,7 @@ "4 166 346 1 886397596" ] }, - "execution_count": 8, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -152,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -181,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -223,7 +223,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -265,7 +265,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -280,7 +280,7 @@ "Name: user, dtype: float64" ] }, - "execution_count": 12, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -298,7 +298,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -309,7 +309,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -336,7 +336,7 @@ " 18: 'Western'}" ] }, - "execution_count": 14, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -347,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -356,7 +356,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -500,7 +500,7 @@ "[3 rows x 24 columns]" ] }, - "execution_count": 16, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -511,7 +511,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -521,7 +521,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -530,7 +530,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -540,7 +540,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -613,7 +613,7 @@ "4 5 Copycat (1995) Crime, Drama, Thriller" ] }, - "execution_count": 20, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -632,7 +632,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -642,7 +642,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ diff --git a/P1. Baseline.ipynb b/P1. Baseline.ipynb index 5e073cf..889bc05 100644 --- a/P1. Baseline.ipynb +++ b/P1. Baseline.ipynb @@ -307,7 +307,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "1.1 µs ± 63.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n", + "885 ns ± 165 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n", "Inefficient way to access items rated by user:\n" ] }, @@ -325,7 +325,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "149 µs ± 13.2 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" + "153 µs ± 9.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" ] } ], @@ -1200,14 +1200,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "RMSE: 1.5133\n", - "MAE: 1.2143\n" + "RMSE: 1.5317\n", + "MAE: 1.2304\n" ] }, { "data": { "text/plain": [ - "1.2143089419556985" + "1.2303840461147084" ] }, "execution_count": 25, diff --git a/P2. Evaluation.ipynb b/P2. Evaluation.ipynb index 3caa717..6962e33 100644 --- a/P2. Evaluation.ipynb +++ b/P2. Evaluation.ipynb @@ -273,7 +273,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "943it [00:00, 7647.02it/s]\n" + "943it [00:00, 7666.87it/s]\n" ] }, { @@ -477,7 +477,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "943it [00:00, 7829.39it/s]\n" + "943it [00:00, 7370.69it/s]\n" ] }, { @@ -585,23 +585,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "943it [00:00, 7954.38it/s]\n", - "943it [00:00, 4698.62it/s]\n", - "943it [00:00, 5104.10it/s]\n", - "943it [00:00, 4853.27it/s]\n", - "943it [00:00, 4669.78it/s]\n", - "943it [00:00, 4207.34it/s]\n", - "943it [00:00, 5248.26it/s]\n", - "943it [00:00, 4477.59it/s]\n", - "943it [00:00, 4280.31it/s]\n", - "943it [00:00, 3915.20it/s]\n", - "943it [00:00, 4648.51it/s]\n", - "943it [00:00, 3819.45it/s]\n", - "943it [00:00, 4405.24it/s]\n", - "943it [00:00, 4725.10it/s]\n", - "943it [00:00, 4426.18it/s]\n", - "943it [00:00, 4179.78it/s]\n", - "943it [00:00, 4919.92it/s]\n" + "943it [00:00, 7772.74it/s]\n", + "943it [00:00, 5607.69it/s]\n", + "943it [00:00, 4737.64it/s]\n", + "943it [00:00, 4986.41it/s]\n", + "943it [00:00, 3513.77it/s]\n" ] } ], @@ -658,54 +646,6 @@ " \n", " \n", " 0\n", - " Ready_LightFM\n", - " 162.703697\n", - " 160.837311\n", - " 0.349523\n", - " 0.226193\n", - " 0.225202\n", - " 0.265538\n", - " 0.246459\n", - " 0.266934\n", - " \n", - " \n", - " 0\n", - " Ready_LightFMpureMF\n", - " 8.015665\n", - " 7.520402\n", - " 0.333934\n", - " 0.216047\n", - " 0.214731\n", - " 0.253177\n", - " 0.232725\n", - " 0.254485\n", - " \n", - " \n", - " 0\n", - " Self_P3\n", - " 3.702446\n", - " 3.527273\n", - " 0.282185\n", - " 0.192092\n", - " 0.186749\n", - " 0.216980\n", - " 0.204185\n", - " 0.240096\n", - " \n", - " \n", - " 0\n", - " Ready_ImplicitALS\n", - " 3.267237\n", - " 3.068493\n", - " 0.252068\n", - " 0.182639\n", - " 0.175182\n", - " 0.199457\n", - " 0.167167\n", - " 0.216308\n", - " \n", - " \n", - " 0\n", " Self_TopPop\n", " 2.508258\n", " 2.217909\n", @@ -718,42 +658,6 @@ " \n", " \n", " 0\n", - " Ready_LightFMcontent\n", - " 182.840876\n", - " 180.771141\n", - " 0.161294\n", - " 0.100424\n", - " 0.101736\n", - " 0.121096\n", - " 0.101395\n", - " 0.110660\n", - " \n", - " \n", - " 0\n", - " Ready_SVD\n", - " 0.953076\n", - " 0.750219\n", - " 0.094804\n", - " 0.045302\n", - " 0.051519\n", - " 0.065833\n", - " 0.083691\n", - " 0.074336\n", - " \n", - " \n", - " 0\n", - " Self_SVD\n", - " 0.913840\n", - " 0.717167\n", - " 0.105620\n", - " 0.044070\n", - " 0.053839\n", - " 0.071381\n", - " 0.096030\n", - " 0.074982\n", - " \n", - " \n", - " 0\n", " Ready_Baseline\n", " 0.949459\n", " 0.752487\n", @@ -766,18 +670,6 @@ " \n", " \n", " 0\n", - " Ready_SVDBiased\n", - " 0.941830\n", - " 0.742841\n", - " 0.083033\n", - " 0.034867\n", - " 0.041967\n", - " 0.055644\n", - " 0.072425\n", - " 0.054271\n", - " \n", - " \n", - " 0\n", " Self_GlobalAvg\n", " 1.125760\n", " 0.943534\n", @@ -791,50 +683,14 @@ " \n", " 0\n", " Ready_Random\n", - " 1.513348\n", - " 1.214309\n", - " 0.044221\n", - " 0.019366\n", - " 0.022599\n", - " 0.029593\n", - " 0.026288\n", - " 0.018226\n", - " \n", - " \n", - " 0\n", - " Ready_I-KNN\n", - " 1.030386\n", - " 0.813067\n", - " 0.026087\n", - " 0.006908\n", - " 0.010593\n", - " 0.016046\n", - " 0.021137\n", - " 0.009522\n", - " \n", - " \n", - " 0\n", - " Ready_I-KNNBaseline\n", - " 0.935327\n", - " 0.737424\n", - " 0.002545\n", - " 0.000755\n", - " 0.001105\n", - " 0.001602\n", - " 0.002253\n", - " 0.000930\n", - " \n", - " \n", - " 0\n", - " Ready_U-KNN\n", - " 1.023495\n", - " 0.807913\n", - " 0.000742\n", - " 0.000205\n", - " 0.000305\n", - " 0.000449\n", - " 0.000536\n", - " 0.000198\n", + " 1.531724\n", + " 1.230384\n", + " 0.049417\n", + " 0.022558\n", + " 0.025490\n", + " 0.033242\n", + " 0.030365\n", + " 0.022626\n", " \n", " \n", " 0\n", @@ -848,60 +704,24 @@ " 0.000644\n", " 0.000189\n", " \n", - " \n", - " 0\n", - " Self_IKNN\n", - " 1.018363\n", - " 0.808793\n", - " 0.000318\n", - " 0.000108\n", - " 0.000140\n", - " 0.000189\n", - " 0.000000\n", - " 0.000000\n", - " \n", " \n", "\n", "" ], "text/plain": [ - " Model RMSE MAE precision recall \\\n", - "0 Ready_LightFM 162.703697 160.837311 0.349523 0.226193 \n", - "0 Ready_LightFMpureMF 8.015665 7.520402 0.333934 0.216047 \n", - "0 Self_P3 3.702446 3.527273 0.282185 0.192092 \n", - "0 Ready_ImplicitALS 3.267237 3.068493 0.252068 0.182639 \n", - "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 \n", - "0 Ready_LightFMcontent 182.840876 180.771141 0.161294 0.100424 \n", - "0 Ready_SVD 0.953076 0.750219 0.094804 0.045302 \n", - "0 Self_SVD 0.913840 0.717167 0.105620 0.044070 \n", - "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 \n", - "0 Ready_SVDBiased 0.941830 0.742841 0.083033 0.034867 \n", - "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 \n", - "0 Ready_Random 1.513348 1.214309 0.044221 0.019366 \n", - "0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 \n", - "0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 \n", - "0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 \n", - "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 \n", - "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 \n", + " Model RMSE MAE precision recall F_1 \\\n", + "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", + "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", + "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", + "0 Ready_Random 1.531724 1.230384 0.049417 0.022558 0.025490 \n", + "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "\n", - " F_1 F_05 precision_super recall_super \n", - "0 0.225202 0.265538 0.246459 0.266934 \n", - "0 0.214731 0.253177 0.232725 0.254485 \n", - "0 0.186749 0.216980 0.204185 0.240096 \n", - "0 0.175182 0.199457 0.167167 0.216308 \n", - "0 0.118732 0.141584 0.130472 0.137473 \n", - "0 0.101736 0.121096 0.101395 0.110660 \n", - "0 0.051519 0.065833 0.083691 0.074336 \n", - "0 0.053839 0.071381 0.096030 0.074982 \n", - "0 0.046030 0.061286 0.079614 0.056463 \n", - "0 0.041967 0.055644 0.072425 0.054271 \n", - "0 0.031383 0.041343 0.040558 0.032107 \n", - "0 0.022599 0.029593 0.026288 0.018226 \n", - "0 0.010593 0.016046 0.021137 0.009522 \n", - "0 0.001105 0.001602 0.002253 0.000930 \n", - "0 0.000305 0.000449 0.000536 0.000198 \n", - "0 0.000278 0.000463 0.000644 0.000189 \n", - "0 0.000140 0.000189 0.000000 0.000000 " + " F_05 precision_super recall_super \n", + "0 0.141584 0.130472 0.137473 \n", + "0 0.061286 0.079614 0.056463 \n", + "0 0.041343 0.040558 0.032107 \n", + "0 0.033242 0.030365 0.022626 \n", + "0 0.000463 0.000644 0.000189 " ] }, "execution_count": 12, @@ -954,58 +774,6 @@ " \n", " \n", " 0\n", - " Ready_LightFM\n", - " 0.413969\n", - " 0.277036\n", - " 0.648029\n", - " 0.610845\n", - " 0.916225\n", - " 1.000000\n", - " 0.352814\n", - " 5.363070\n", - " 0.885116\n", - " \n", - " \n", - " 0\n", - " Ready_LightFMpureMF\n", - " 0.391316\n", - " 0.257793\n", - " 0.606204\n", - " 0.605708\n", - " 0.906681\n", - " 1.000000\n", - " 0.272006\n", - " 5.031437\n", - " 0.918177\n", - " \n", - " \n", - " 0\n", - " Self_P3\n", - " 0.339114\n", - " 0.204905\n", - " 0.572157\n", - " 0.593544\n", - " 0.875928\n", - " 1.000000\n", - " 0.077201\n", - " 3.875892\n", - " 0.974947\n", - " \n", - " \n", - " 0\n", - " Ready_ImplicitALS\n", - " 0.295331\n", - " 0.163847\n", - " 0.500282\n", - " 0.588672\n", - " 0.873807\n", - " 0.999894\n", - " 0.497835\n", - " 5.727745\n", - " 0.825683\n", - " \n", - " \n", - " 0\n", " Self_TopPop\n", " 0.214651\n", " 0.111707\n", @@ -1019,45 +787,6 @@ " \n", " \n", " 0\n", - " Ready_LightFMcontent\n", - " 0.184311\n", - " 0.091346\n", - " 0.352019\n", - " 0.547187\n", - " 0.705196\n", - " 0.979533\n", - " 0.269120\n", - " 4.940084\n", - " 0.924146\n", - " \n", - " \n", - " 0\n", - " Ready_SVD\n", - " 0.107620\n", - " 0.051155\n", - " 0.234251\n", - " 0.519361\n", - " 0.490986\n", - " 0.993425\n", - " 0.206349\n", - " 4.406898\n", - " 0.953781\n", - " \n", - " \n", - " 0\n", - " Self_SVD\n", - " 0.109138\n", - " 0.051857\n", - " 0.202054\n", - " 0.518772\n", - " 0.478261\n", - " 0.872959\n", - " 0.144300\n", - " 3.912577\n", - " 0.971609\n", - " \n", - " \n", - " 0\n", " Ready_Baseline\n", " 0.095957\n", " 0.043178\n", @@ -1071,19 +800,6 @@ " \n", " \n", " 0\n", - " Ready_SVDBiased\n", - " 0.090974\n", - " 0.041243\n", - " 0.195741\n", - " 0.514084\n", - " 0.418876\n", - " 0.998409\n", - " 0.168831\n", - " 4.152102\n", - " 0.964603\n", - " \n", - " \n", - " 0\n", " Self_GlobalAvg\n", " 0.067695\n", " 0.027470\n", @@ -1098,54 +814,15 @@ " \n", " 0\n", " Ready_Random\n", - " 0.047273\n", - " 0.017729\n", - " 0.114687\n", - " 0.506181\n", - " 0.301166\n", - " 0.986002\n", - " 0.184704\n", - " 5.093324\n", - " 0.907405\n", - " \n", - " \n", - " 0\n", - " Ready_I-KNN\n", - " 0.024214\n", - " 0.008958\n", - " 0.048068\n", - " 0.499885\n", - " 0.154825\n", - " 0.402333\n", - " 0.434343\n", - " 5.133650\n", - " 0.877999\n", - " \n", - " \n", - " 0\n", - " Ready_I-KNNBaseline\n", - " 0.003444\n", - " 0.001362\n", - " 0.011760\n", - " 0.496724\n", - " 0.021209\n", - " 0.482821\n", - " 0.059885\n", - " 2.232578\n", - " 0.994487\n", - " \n", - " \n", - " 0\n", - " Ready_U-KNN\n", - " 0.000845\n", - " 0.000274\n", - " 0.002744\n", - " 0.496441\n", - " 0.007423\n", - " 0.602121\n", - " 0.010823\n", - " 2.089186\n", - " 0.995706\n", + " 0.054166\n", + " 0.021656\n", + " 0.128378\n", + " 0.507802\n", + " 0.325557\n", + " 0.988865\n", + " 0.190476\n", + " 5.100033\n", + " 0.907724\n", " \n", " \n", " 0\n", @@ -1160,61 +837,24 @@ " 1.803126\n", " 0.996380\n", " \n", - " \n", - " 0\n", - " Self_IKNN\n", - " 0.000214\n", - " 0.000037\n", - " 0.000368\n", - " 0.496391\n", - " 0.003181\n", - " 0.392153\n", - " 0.115440\n", - " 4.174741\n", - " 0.965327\n", - " \n", " \n", "\n", "" ], "text/plain": [ - " Model NDCG mAP MRR LAUC HR \\\n", - "0 Ready_LightFM 0.413969 0.277036 0.648029 0.610845 0.916225 \n", - "0 Ready_LightFMpureMF 0.391316 0.257793 0.606204 0.605708 0.906681 \n", - "0 Self_P3 0.339114 0.204905 0.572157 0.593544 0.875928 \n", - "0 Ready_ImplicitALS 0.295331 0.163847 0.500282 0.588672 0.873807 \n", - "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n", - "0 Ready_LightFMcontent 0.184311 0.091346 0.352019 0.547187 0.705196 \n", - "0 Ready_SVD 0.107620 0.051155 0.234251 0.519361 0.490986 \n", - "0 Self_SVD 0.109138 0.051857 0.202054 0.518772 0.478261 \n", - "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n", - "0 Ready_SVDBiased 0.090974 0.041243 0.195741 0.514084 0.418876 \n", - "0 Self_GlobalAvg 0.067695 0.027470 0.171187 0.509546 0.384942 \n", - "0 Ready_Random 0.047273 0.017729 0.114687 0.506181 0.301166 \n", - "0 Ready_I-KNN 0.024214 0.008958 0.048068 0.499885 0.154825 \n", - "0 Ready_I-KNNBaseline 0.003444 0.001362 0.011760 0.496724 0.021209 \n", - "0 Ready_U-KNN 0.000845 0.000274 0.002744 0.496441 0.007423 \n", - "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n", - "0 Self_IKNN 0.000214 0.000037 0.000368 0.496391 0.003181 \n", + " Model NDCG mAP MRR LAUC HR \\\n", + "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n", + "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n", + "0 Self_GlobalAvg 0.067695 0.027470 0.171187 0.509546 0.384942 \n", + "0 Ready_Random 0.054166 0.021656 0.128378 0.507802 0.325557 \n", + "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n", "\n", " Reco in test Test coverage Shannon Gini \n", - "0 1.000000 0.352814 5.363070 0.885116 \n", - "0 1.000000 0.272006 5.031437 0.918177 \n", - "0 1.000000 0.077201 3.875892 0.974947 \n", - "0 0.999894 0.497835 5.727745 0.825683 \n", "0 1.000000 0.038961 3.159079 0.987317 \n", - "0 0.979533 0.269120 4.940084 0.924146 \n", - "0 0.993425 0.206349 4.406898 0.953781 \n", - "0 0.872959 0.144300 3.912577 0.971609 \n", "0 1.000000 0.033911 2.836513 0.991139 \n", - "0 0.998409 0.168831 4.152102 0.964603 \n", "0 1.000000 0.025974 2.711772 0.992003 \n", - "0 0.986002 0.184704 5.093324 0.907405 \n", - "0 0.402333 0.434343 5.133650 0.877999 \n", - "0 0.482821 0.059885 2.232578 0.994487 \n", - "0 0.602121 0.010823 2.089186 0.995706 \n", - "0 0.600530 0.005051 1.803126 0.996380 \n", - "0 0.392153 0.115440 4.174741 0.965327 " + "0 0.988865 0.190476 5.100033 0.907724 \n", + "0 0.600530 0.005051 1.803126 0.996380 " ] }, "execution_count": 13, @@ -1242,7 +882,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "3it [00:00, 4233.82it/s]\n" + "3it [00:00, 1941.81it/s]\n" ] }, { @@ -1558,258 +1198,6 @@ "display(estimations)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# A/B testing" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# Suppose we had\n", - "A_successes=1000\n", - "A_failures=9000\n", - "\n", - "B_successes=1500\n", - "B_failures=12000" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Confidence intervals" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
successesfailuresconversion
A100015000.4000
B9000120000.4286
\n", - "
" - ], - "text/plain": [ - " successes failures conversion\n", - "A 1000 1500 0.4000\n", - "B 9000 12000 0.4286" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df=pd.DataFrame({'successes': [A_successes, A_failures],'failures': [B_successes,B_failures]}, index=['A','B'])\n", - "df['conversion']=df.apply(lambda x: round(x['successes']/(x['successes']+x['failures']),4), axis=1)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
successesfailuresconversionconf_interval
A100015000.4000[0.3808, 0.4194]
B9000120000.4286[0.4219, 0.4353]
\n", - "
" - ], - "text/plain": [ - " successes failures conversion conf_interval\n", - "A 1000 1500 0.4000 [0.3808, 0.4194]\n", - "B 9000 12000 0.4286 [0.4219, 0.4353]" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "significance=0.95\n", - "\n", - "from statsmodels.stats.proportion import proportion_confint\n", - "df['conf_interval']=df.apply(lambda x: [round(i,4) for i in proportion_confint(count=x['successes'], nobs=x['successes']+x['failures'], alpha=1-significance, method='binom_test')], axis=1)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "p-value: 0.006729080907452261\n" - ] - } - ], - "source": [ - "from scipy.stats import chi2_contingency\n", - "cond = np.array([[A_successes, A_failures], [B_successes, B_failures]])\n", - "print(f'p-value: {chi2_contingency(cond)[1]}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### How many observations do we need? Power analysis " - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Effect size: 0.02041241452319317\n", - "Samples needed: 18837\n" - ] - } - ], - "source": [ - "# sample size calculator: https://www.evanmiller.org/ab-testing/sample-size.html \n", - "# for now let's assume conversion from control group is known\n", - "\n", - "from statsmodels.stats.power import GofChisquarePower\n", - "from statsmodels.stats.gof import chisquare_effectsize\n", - "\n", - "effect_size=chisquare_effectsize([df['conversion']['A'], 1-df['conversion']['A']], \n", - " [df['conversion']['A']+0.01, 1-df['conversion']['A']-0.01])\n", - "print(f'Effect size: {effect_size}')\n", - "print(f'Samples needed: {round(GofChisquarePower().solve_power(effect_size, power=.8, n_bins=2, alpha=0.05))}')" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Effect size: 0.07001400420140048\n", - "Samples needed: 1601\n" - ] - } - ], - "source": [ - "# for now let's assume conversion from control group is known\n", - "# it's not correct looking at https://www.evanmiller.org/ab-testing/sample-size.html\n", - "from statsmodels.stats.power import GofChisquarePower\n", - "from statsmodels.stats.gof import chisquare_effectsize\n", - "n_levels_variable_a = 1 # to verify\n", - "n_levels_variable_b = 2\n", - "\n", - "effect_size=chisquare_effectsize([0.15, 0.85], [0.125,0.875])\n", - "print(f'Effect size: {effect_size}')\n", - "print(f'Samples needed: {round(GofChisquarePower().solve_power(effect_size, power=.8, n_bins=(n_levels_variable_a)*(n_levels_variable_b), alpha=0.05))}')" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -1819,7 +1207,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1858,148 +1246,148 @@ " \n", " \n", " \n", - " 41281\n", - " 437\n", + " 2985\n", + " 789\n", " 5\n", - " Gone with the Wind (1939)\n", - " Drama, Romance, War\n", + " Star Wars (1977)\n", + " Action, Adventure, Romance, Sci-Fi, War\n", " \n", " \n", - " 28880\n", - " 437\n", + " 25980\n", + " 789\n", " 5\n", - " Pinocchio (1940)\n", - " Animation, Children's\n", + " Dead Man Walking (1995)\n", + " Drama\n", " \n", " \n", - " 36888\n", - " 437\n", + " 9357\n", + " 789\n", " 5\n", - " Backbeat (1993)\n", - " Drama, Musical\n", + " Last Supper, The (1995)\n", + " Drama, Thriller\n", " \n", " \n", - " 36713\n", - " 437\n", + " 17306\n", + " 789\n", " 5\n", + " Leaving Las Vegas (1995)\n", + " Drama, Romance\n", + " \n", + " \n", + " 36474\n", + " 789\n", + " 5\n", + " Swingers (1996)\n", + " Comedy, Drama\n", + " \n", + " \n", + " 65139\n", + " 789\n", + " 4\n", + " Welcome to the Dollhouse (1995)\n", + " Comedy, Drama\n", + " \n", + " \n", + " 61975\n", + " 789\n", + " 4\n", + " Private Parts (1997)\n", + " Comedy, Drama\n", + " \n", + " \n", + " 56522\n", + " 789\n", + " 4\n", + " Waiting for Guffman (1996)\n", + " Comedy\n", + " \n", + " \n", + " 41414\n", + " 789\n", + " 4\n", + " Donnie Brasco (1997)\n", + " Crime, Drama\n", + " \n", + " \n", + " 36617\n", + " 789\n", + " 4\n", " Lone Star (1996)\n", " Drama, Mystery\n", " \n", " \n", - " 36122\n", - " 437\n", - " 5\n", - " Silence of the Lambs, The (1991)\n", - " Drama, Thriller\n", - " \n", - " \n", - " 32783\n", - " 437\n", - " 5\n", - " Muriel's Wedding (1994)\n", - " Comedy, Romance\n", - " \n", - " \n", - " 30950\n", - " 437\n", - " 5\n", - " Rosewood (1997)\n", + " 24501\n", + " 789\n", + " 4\n", + " People vs. Larry Flynt, The (1996)\n", " Drama\n", " \n", " \n", - " 30386\n", - " 437\n", - " 5\n", - " Manchurian Candidate, The (1962)\n", - " Film-Noir, Thriller\n", + " 20210\n", + " 789\n", + " 4\n", + " Return of the Jedi (1983)\n", + " Action, Adventure, Romance, Sci-Fi, War\n", " \n", " \n", - " 29411\n", - " 437\n", - " 5\n", - " Psycho (1960)\n", - " Horror, Romance, Thriller\n", + " 8230\n", + " 789\n", + " 3\n", + " Beautiful Girls (1996)\n", + " Drama\n", " \n", " \n", - " 27655\n", - " 437\n", - " 5\n", - " Vertigo (1958)\n", - " Mystery, Thriller\n", - " \n", - " \n", - " 14735\n", - " 437\n", - " 5\n", - " Raising Arizona (1987)\n", + " 19781\n", + " 789\n", + " 3\n", + " Liar Liar (1997)\n", " Comedy\n", " \n", " \n", - " 27563\n", - " 437\n", - " 5\n", - " Young Frankenstein (1974)\n", - " Comedy, Horror\n", - " \n", - " \n", - " 26524\n", - " 437\n", - " 5\n", - " Everyone Says I Love You (1996)\n", - " Comedy, Musical, Romance\n", - " \n", - " \n", - " 25618\n", - " 437\n", - " 5\n", - " Citizen Kane (1941)\n", - " Drama\n", - " \n", - " \n", - " 23714\n", - " 437\n", - " 5\n", - " Casablanca (1942)\n", - " Drama, Romance, War\n", + " 39387\n", + " 789\n", + " 3\n", + " Sleepers (1996)\n", + " Crime, Drama\n", " \n", " \n", "\n", "" ], "text/plain": [ - " user rating title \\\n", - "41281 437 5 Gone with the Wind (1939) \n", - "28880 437 5 Pinocchio (1940) \n", - "36888 437 5 Backbeat (1993) \n", - "36713 437 5 Lone Star (1996) \n", - "36122 437 5 Silence of the Lambs, The (1991) \n", - "32783 437 5 Muriel's Wedding (1994) \n", - "30950 437 5 Rosewood (1997) \n", - "30386 437 5 Manchurian Candidate, The (1962) \n", - "29411 437 5 Psycho (1960) \n", - "27655 437 5 Vertigo (1958) \n", - "14735 437 5 Raising Arizona (1987) \n", - "27563 437 5 Young Frankenstein (1974) \n", - "26524 437 5 Everyone Says I Love You (1996) \n", - "25618 437 5 Citizen Kane (1941) \n", - "23714 437 5 Casablanca (1942) \n", + " user rating title \\\n", + "2985 789 5 Star Wars (1977) \n", + "25980 789 5 Dead Man Walking (1995) \n", + "9357 789 5 Last Supper, The (1995) \n", + "17306 789 5 Leaving Las Vegas (1995) \n", + "36474 789 5 Swingers (1996) \n", + "65139 789 4 Welcome to the Dollhouse (1995) \n", + "61975 789 4 Private Parts (1997) \n", + "56522 789 4 Waiting for Guffman (1996) \n", + "41414 789 4 Donnie Brasco (1997) \n", + "36617 789 4 Lone Star (1996) \n", + "24501 789 4 People vs. Larry Flynt, The (1996) \n", + "20210 789 4 Return of the Jedi (1983) \n", + "8230 789 3 Beautiful Girls (1996) \n", + "19781 789 3 Liar Liar (1997) \n", + "39387 789 3 Sleepers (1996) \n", "\n", - " genres \n", - "41281 Drama, Romance, War \n", - "28880 Animation, Children's \n", - "36888 Drama, Musical \n", - "36713 Drama, Mystery \n", - "36122 Drama, Thriller \n", - "32783 Comedy, Romance \n", - "30950 Drama \n", - "30386 Film-Noir, Thriller \n", - "29411 Horror, Romance, Thriller \n", - "27655 Mystery, Thriller \n", - "14735 Comedy \n", - "27563 Comedy, Horror \n", - "26524 Comedy, Musical, Romance \n", - "25618 Drama \n", - "23714 Drama, Romance, War " + " genres \n", + "2985 Action, Adventure, Romance, Sci-Fi, War \n", + "25980 Drama \n", + "9357 Drama, Thriller \n", + "17306 Drama, Romance \n", + "36474 Comedy, Drama \n", + "65139 Comedy, Drama \n", + "61975 Comedy, Drama \n", + "56522 Comedy \n", + "41414 Crime, Drama \n", + "36617 Drama, Mystery \n", + "24501 Drama \n", + "20210 Action, Adventure, Romance, Sci-Fi, War \n", + "8230 Drama \n", + "19781 Comedy \n", + "39387 Crime, Drama " ] }, "metadata": {}, @@ -2041,106 +1429,106 @@ " \n", " \n", " \n", - " 435\n", - " 437.0\n", + " 787\n", + " 789.0\n", " 1\n", " Great Day in Harlem, A (1994)\n", " Documentary\n", " \n", " \n", - " 1377\n", - " 437.0\n", + " 1729\n", + " 789.0\n", " 2\n", " Tough and Deadly (1995)\n", " Action, Drama, Thriller\n", " \n", " \n", - " 2319\n", - " 437.0\n", + " 2671\n", + " 789.0\n", " 3\n", " Aiqing wansui (1994)\n", " Drama\n", " \n", " \n", - " 3261\n", - " 437.0\n", + " 3613\n", + " 789.0\n", " 4\n", " Delta of Venus (1994)\n", " Drama\n", " \n", " \n", - " 5145\n", - " 437.0\n", + " 4555\n", + " 789.0\n", " 5\n", + " Someone Else's America (1995)\n", + " Drama\n", + " \n", + " \n", + " 5497\n", + " 789.0\n", + " 6\n", " Saint of Fort Washington, The (1993)\n", " Drama\n", " \n", " \n", - " 6087\n", - " 437.0\n", - " 6\n", + " 6439\n", + " 789.0\n", + " 7\n", " Celestial Clockwork (1994)\n", " Comedy\n", " \n", " \n", - " 7030\n", - " 437.0\n", - " 7\n", + " 7380\n", + " 789.0\n", + " 8\n", " Some Mother's Son (1996)\n", " Drama\n", " \n", " \n", - " 8924\n", - " 437.0\n", - " 8\n", + " 9276\n", + " 789.0\n", + " 9\n", " Maya Lin: A Strong Clear Vision (1994)\n", " Documentary\n", " \n", " \n", - " 7970\n", - " 437.0\n", - " 9\n", + " 8322\n", + " 789.0\n", + " 10\n", " Prefontaine (1997)\n", " Drama\n", " \n", - " \n", - " 8485\n", - " 437.0\n", - " 10\n", - " Santa with Muscles (1996)\n", - " Comedy\n", - " \n", " \n", "\n", "" ], "text/plain": [ " user rec_nb title \\\n", - "435 437.0 1 Great Day in Harlem, A (1994) \n", - "1377 437.0 2 Tough and Deadly (1995) \n", - "2319 437.0 3 Aiqing wansui (1994) \n", - "3261 437.0 4 Delta of Venus (1994) \n", - "5145 437.0 5 Saint of Fort Washington, The (1993) \n", - "6087 437.0 6 Celestial Clockwork (1994) \n", - "7030 437.0 7 Some Mother's Son (1996) \n", - "8924 437.0 8 Maya Lin: A Strong Clear Vision (1994) \n", - "7970 437.0 9 Prefontaine (1997) \n", - "8485 437.0 10 Santa with Muscles (1996) \n", + "787 789.0 1 Great Day in Harlem, A (1994) \n", + "1729 789.0 2 Tough and Deadly (1995) \n", + "2671 789.0 3 Aiqing wansui (1994) \n", + "3613 789.0 4 Delta of Venus (1994) \n", + "4555 789.0 5 Someone Else's America (1995) \n", + "5497 789.0 6 Saint of Fort Washington, The (1993) \n", + "6439 789.0 7 Celestial Clockwork (1994) \n", + "7380 789.0 8 Some Mother's Son (1996) \n", + "9276 789.0 9 Maya Lin: A Strong Clear Vision (1994) \n", + "8322 789.0 10 Prefontaine (1997) \n", "\n", " genres \n", - "435 Documentary \n", - "1377 Action, Drama, Thriller \n", - "2319 Drama \n", - "3261 Drama \n", - "5145 Drama \n", - "6087 Comedy \n", - "7030 Drama \n", - "8924 Documentary \n", - "7970 Drama \n", - "8485 Comedy " + "787 Documentary \n", + "1729 Action, Drama, Thriller \n", + "2671 Drama \n", + "3613 Drama \n", + "4555 Drama \n", + "5497 Drama \n", + "6439 Comedy \n", + "7380 Drama \n", + "9276 Documentary \n", + "8322 Drama " ] }, - "execution_count": 22, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -2188,7 +1576,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -2200,30 +1588,18 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "943it [00:00, 5476.88it/s]\n", - "943it [00:00, 4421.14it/s]\n", - "943it [00:00, 5056.87it/s]\n", - "943it [00:00, 5642.22it/s]\n", - "943it [00:00, 2776.13it/s]\n", - "943it [00:00, 3004.22it/s]\n", - "943it [00:00, 3802.86it/s]\n", - "943it [00:00, 3421.26it/s]\n", - "943it [00:00, 5077.51it/s]\n", - "943it [00:00, 4927.51it/s]\n", - "943it [00:00, 4246.38it/s]\n", - "943it [00:00, 4295.31it/s]\n", - "943it [00:00, 4362.79it/s]\n", - "943it [00:00, 6241.10it/s]\n", - "943it [00:00, 4318.95it/s]\n", - "943it [00:00, 5054.75it/s]\n", - "943it [00:00, 3839.80it/s]\n" + "943it [00:00, 4479.94it/s]\n", + "943it [00:00, 4036.40it/s]\n", + "943it [00:00, 4598.99it/s]\n", + "943it [00:00, 5170.18it/s]\n", + "943it [00:00, 4778.23it/s]\n" ] }, { @@ -2270,90 +1646,6 @@ " \n", " \n", " 0\n", - " Ready_LightFM\n", - " 162.703697\n", - " 160.837311\n", - " 0.349523\n", - " 0.226193\n", - " 0.225202\n", - " 0.265538\n", - " 0.246459\n", - " 0.266934\n", - " 0.413969\n", - " 0.277036\n", - " 0.648029\n", - " 0.610845\n", - " 0.916225\n", - " 1.000000\n", - " 0.352814\n", - " 5.363070\n", - " 0.885116\n", - " \n", - " \n", - " 0\n", - " Ready_LightFMpureMF\n", - " 8.015665\n", - " 7.520402\n", - " 0.333934\n", - " 0.216047\n", - " 0.214731\n", - " 0.253177\n", - " 0.232725\n", - " 0.254485\n", - " 0.391316\n", - " 0.257793\n", - " 0.606204\n", - " 0.605708\n", - " 0.906681\n", - " 1.000000\n", - " 0.272006\n", - " 5.031437\n", - " 0.918177\n", - " \n", - " \n", - " 0\n", - " Self_P3\n", - " 3.702446\n", - " 3.527273\n", - " 0.282185\n", - " 0.192092\n", - " 0.186749\n", - " 0.216980\n", - " 0.204185\n", - " 0.240096\n", - " 0.339114\n", - " 0.204905\n", - " 0.572157\n", - " 0.593544\n", - " 0.875928\n", - " 1.000000\n", - " 0.077201\n", - " 3.875892\n", - " 0.974947\n", - " \n", - " \n", - " 0\n", - " Ready_ImplicitALS\n", - " 3.267237\n", - " 3.068493\n", - " 0.252068\n", - " 0.182639\n", - " 0.175182\n", - " 0.199457\n", - " 0.167167\n", - " 0.216308\n", - " 0.295331\n", - " 0.163847\n", - " 0.500282\n", - " 0.588672\n", - " 0.873807\n", - " 0.999894\n", - " 0.497835\n", - " 5.727745\n", - " 0.825683\n", - " \n", - " \n", - " 0\n", " Self_TopPop\n", " 2.508258\n", " 2.217909\n", @@ -2375,69 +1667,6 @@ " \n", " \n", " 0\n", - " Ready_LightFMcontent\n", - " 182.840876\n", - " 180.771141\n", - " 0.161294\n", - " 0.100424\n", - " 0.101736\n", - " 0.121096\n", - " 0.101395\n", - " 0.110660\n", - " 0.184311\n", - " 0.091346\n", - " 0.352019\n", - " 0.547187\n", - " 0.705196\n", - " 0.979533\n", - " 0.269120\n", - " 4.940084\n", - " 0.924146\n", - " \n", - " \n", - " 0\n", - " Ready_SVD\n", - " 0.953076\n", - " 0.750219\n", - " 0.094804\n", - " 0.045302\n", - " 0.051519\n", - " 0.065833\n", - " 0.083691\n", - " 0.074336\n", - " 0.107620\n", - " 0.051155\n", - " 0.234251\n", - " 0.519361\n", - " 0.490986\n", - " 0.993425\n", - " 0.206349\n", - " 4.406898\n", - " 0.953781\n", - " \n", - " \n", - " 0\n", - " Self_SVD\n", - " 0.913840\n", - " 0.717167\n", - " 0.105620\n", - " 0.044070\n", - " 0.053839\n", - " 0.071381\n", - " 0.096030\n", - " 0.074982\n", - " 0.109138\n", - " 0.051857\n", - " 0.202054\n", - " 0.518772\n", - " 0.478261\n", - " 0.872959\n", - " 0.144300\n", - " 3.912577\n", - " 0.971609\n", - " \n", - " \n", - " 0\n", " Ready_Baseline\n", " 0.949459\n", " 0.752487\n", @@ -2459,27 +1688,6 @@ " \n", " \n", " 0\n", - " Ready_SVDBiased\n", - " 0.941830\n", - " 0.742841\n", - " 0.083033\n", - " 0.034867\n", - " 0.041967\n", - " 0.055644\n", - " 0.072425\n", - " 0.054271\n", - " 0.090974\n", - " 0.041243\n", - " 0.195741\n", - " 0.514084\n", - " 0.418876\n", - " 0.998409\n", - " 0.168831\n", - " 4.152102\n", - " 0.964603\n", - " \n", - " \n", - " 0\n", " Self_GlobalAvg\n", " 1.125760\n", " 0.943534\n", @@ -2502,86 +1710,23 @@ " \n", " 0\n", " Ready_Random\n", - " 1.513348\n", - " 1.214309\n", - " 0.044221\n", - " 0.019366\n", - " 0.022599\n", - " 0.029593\n", - " 0.026288\n", - " 0.018226\n", - " 0.047273\n", - " 0.017729\n", - " 0.114687\n", - " 0.506181\n", - " 0.301166\n", - " 0.986002\n", - " 0.184704\n", - " 5.093324\n", - " 0.907405\n", - " \n", - " \n", - " 0\n", - " Ready_I-KNN\n", - " 1.030386\n", - " 0.813067\n", - " 0.026087\n", - " 0.006908\n", - " 0.010593\n", - " 0.016046\n", - " 0.021137\n", - " 0.009522\n", - " 0.024214\n", - " 0.008958\n", - " 0.048068\n", - " 0.499885\n", - " 0.154825\n", - " 0.402333\n", - " 0.434343\n", - " 5.133650\n", - " 0.877999\n", - " \n", - " \n", - " 0\n", - " Ready_I-KNNBaseline\n", - " 0.935327\n", - " 0.737424\n", - " 0.002545\n", - " 0.000755\n", - " 0.001105\n", - " 0.001602\n", - " 0.002253\n", - " 0.000930\n", - " 0.003444\n", - " 0.001362\n", - " 0.011760\n", - " 0.496724\n", - " 0.021209\n", - " 0.482821\n", - " 0.059885\n", - " 2.232578\n", - " 0.994487\n", - " \n", - " \n", - " 0\n", - " Ready_U-KNN\n", - " 1.023495\n", - " 0.807913\n", - " 0.000742\n", - " 0.000205\n", - " 0.000305\n", - " 0.000449\n", - " 0.000536\n", - " 0.000198\n", - " 0.000845\n", - " 0.000274\n", - " 0.002744\n", - " 0.496441\n", - " 0.007423\n", - " 0.602121\n", - " 0.010823\n", - " 2.089186\n", - " 0.995706\n", + " 1.531724\n", + " 1.230384\n", + " 0.049417\n", + " 0.022558\n", + " 0.025490\n", + " 0.033242\n", + " 0.030365\n", + " 0.022626\n", + " 0.054166\n", + " 0.021656\n", + " 0.128378\n", + " 0.507802\n", + " 0.325557\n", + " 0.988865\n", + " 0.190476\n", + " 5.100033\n", + " 0.907724\n", " \n", " \n", " 0\n", @@ -2604,110 +1749,34 @@ " 1.803126\n", " 0.996380\n", " \n", - " \n", - " 0\n", - " Self_IKNN\n", - " 1.018363\n", - " 0.808793\n", - " 0.000318\n", - " 0.000108\n", - " 0.000140\n", - " 0.000189\n", - " 0.000000\n", - " 0.000000\n", - " 0.000214\n", - " 0.000037\n", - " 0.000368\n", - " 0.496391\n", - " 0.003181\n", - " 0.392153\n", - " 0.115440\n", - " 4.174741\n", - " 0.965327\n", - " \n", " \n", "\n", "" ], "text/plain": [ - " Model RMSE MAE precision recall \\\n", - "0 Ready_LightFM 162.703697 160.837311 0.349523 0.226193 \n", - "0 Ready_LightFMpureMF 8.015665 7.520402 0.333934 0.216047 \n", - "0 Self_P3 3.702446 3.527273 0.282185 0.192092 \n", - "0 Ready_ImplicitALS 3.267237 3.068493 0.252068 0.182639 \n", - "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 \n", - "0 Ready_LightFMcontent 182.840876 180.771141 0.161294 0.100424 \n", - "0 Ready_SVD 0.953076 0.750219 0.094804 0.045302 \n", - "0 Self_SVD 0.913840 0.717167 0.105620 0.044070 \n", - "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 \n", - "0 Ready_SVDBiased 0.941830 0.742841 0.083033 0.034867 \n", - "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 \n", - "0 Ready_Random 1.513348 1.214309 0.044221 0.019366 \n", - "0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 \n", - "0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 \n", - "0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 \n", - "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 \n", - "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 \n", + " Model RMSE MAE precision recall F_1 \\\n", + "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", + "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", + "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", + "0 Ready_Random 1.531724 1.230384 0.049417 0.022558 0.025490 \n", + "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "\n", - " F_1 F_05 precision_super recall_super NDCG mAP \\\n", - "0 0.225202 0.265538 0.246459 0.266934 0.413969 0.277036 \n", - "0 0.214731 0.253177 0.232725 0.254485 0.391316 0.257793 \n", - "0 0.186749 0.216980 0.204185 0.240096 0.339114 0.204905 \n", - "0 0.175182 0.199457 0.167167 0.216308 0.295331 0.163847 \n", - "0 0.118732 0.141584 0.130472 0.137473 0.214651 0.111707 \n", - "0 0.101736 0.121096 0.101395 0.110660 0.184311 0.091346 \n", - "0 0.051519 0.065833 0.083691 0.074336 0.107620 0.051155 \n", - "0 0.053839 0.071381 0.096030 0.074982 0.109138 0.051857 \n", - "0 0.046030 0.061286 0.079614 0.056463 0.095957 0.043178 \n", - "0 0.041967 0.055644 0.072425 0.054271 0.090974 0.041243 \n", - "0 0.031383 0.041343 0.040558 0.032107 0.067695 0.027470 \n", - "0 0.022599 0.029593 0.026288 0.018226 0.047273 0.017729 \n", - "0 0.010593 0.016046 0.021137 0.009522 0.024214 0.008958 \n", - "0 0.001105 0.001602 0.002253 0.000930 0.003444 0.001362 \n", - "0 0.000305 0.000449 0.000536 0.000198 0.000845 0.000274 \n", - "0 0.000278 0.000463 0.000644 0.000189 0.000752 0.000168 \n", - "0 0.000140 0.000189 0.000000 0.000000 0.000214 0.000037 \n", + " F_05 precision_super recall_super NDCG mAP MRR \\\n", + "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n", + "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n", + "0 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 \n", + "0 0.033242 0.030365 0.022626 0.054166 0.021656 0.128378 \n", + "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n", "\n", - " MRR LAUC HR Reco in test Test coverage Shannon \\\n", - "0 0.648029 0.610845 0.916225 1.000000 0.352814 5.363070 \n", - "0 0.606204 0.605708 0.906681 1.000000 0.272006 5.031437 \n", - "0 0.572157 0.593544 0.875928 1.000000 0.077201 3.875892 \n", - "0 0.500282 0.588672 0.873807 0.999894 0.497835 5.727745 \n", - "0 0.400939 0.555546 0.765642 1.000000 0.038961 3.159079 \n", - "0 0.352019 0.547187 0.705196 0.979533 0.269120 4.940084 \n", - "0 0.234251 0.519361 0.490986 0.993425 0.206349 4.406898 \n", - "0 0.202054 0.518772 0.478261 0.872959 0.144300 3.912577 \n", - "0 0.198193 0.515501 0.437964 1.000000 0.033911 2.836513 \n", - "0 0.195741 0.514084 0.418876 0.998409 0.168831 4.152102 \n", - "0 0.171187 0.509546 0.384942 1.000000 0.025974 2.711772 \n", - "0 0.114687 0.506181 0.301166 0.986002 0.184704 5.093324 \n", - "0 0.048068 0.499885 0.154825 0.402333 0.434343 5.133650 \n", - "0 0.011760 0.496724 0.021209 0.482821 0.059885 2.232578 \n", - "0 0.002744 0.496441 0.007423 0.602121 0.010823 2.089186 \n", - "0 0.001677 0.496424 0.009544 0.600530 0.005051 1.803126 \n", - "0 0.000368 0.496391 0.003181 0.392153 0.115440 4.174741 \n", - "\n", - " Gini \n", - "0 0.885116 \n", - "0 0.918177 \n", - "0 0.974947 \n", - "0 0.825683 \n", - "0 0.987317 \n", - "0 0.924146 \n", - "0 0.953781 \n", - "0 0.971609 \n", - "0 0.991139 \n", - "0 0.964603 \n", - "0 0.992003 \n", - "0 0.907405 \n", - "0 0.877999 \n", - "0 0.994487 \n", - "0 0.995706 \n", - "0 0.996380 \n", - "0 0.965327 " + " LAUC HR Reco in test Test coverage Shannon Gini \n", + "0 0.555546 0.765642 1.000000 0.038961 3.159079 0.987317 \n", + "0 0.515501 0.437964 1.000000 0.033911 2.836513 0.991139 \n", + "0 0.509546 0.384942 1.000000 0.025974 2.711772 0.992003 \n", + "0 0.507802 0.325557 0.988865 0.190476 5.100033 0.907724 \n", + "0 0.496424 0.009544 0.600530 0.005051 1.803126 0.996380 " ] }, - "execution_count": 24, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" }