{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Self made simplified I-KNN" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import helpers\n", "import pandas as pd\n", "import numpy as np\n", "import scipy.sparse as sparse\n", "from collections import defaultdict\n", "from itertools import chain\n", "import random\n", "\n", "train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n", "test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "class IKNN():\n", " \n", " def fit(self, train_ui):\n", " self.train_ui=train_ui\n", " \n", " train_iu=train_ui.transpose()\n", " norms=np.linalg.norm(train_iu.A, axis=1) # here we compute lenth of each item ratings vector\n", " norms=np.vectorize(lambda x: max(x,1))(norms[:,None]) # to avoid dividing by zero\n", "\n", " normalized_train_iu=sparse.csr_matrix(train_iu/norms)\n", "\n", " self.similarity_matrix_ii=normalized_train_iu*normalized_train_iu.transpose()\n", " \n", " self.estimations=np.array(train_ui*self.similarity_matrix_ii/((train_ui>0)*self.similarity_matrix_ii))\n", " \n", " def recommend(self, user_code_id, item_code_id, topK=10):\n", " \n", " top_k = defaultdict(list)\n", " for nb_user, user in enumerate(self.estimations):\n", " \n", " user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]\n", " for item, score in enumerate(user):\n", " if item not in user_rated and not np.isnan(score):\n", " top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n", " result=[]\n", " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n", " for uid, item_scores in top_k.items():\n", " item_scores.sort(key=lambda x: x[1], reverse=True)\n", " result.append([uid]+list(chain(*item_scores[:topK])))\n", " return result\n", " \n", " def estimate(self, user_code_id, item_code_id, test_ui):\n", " result=[]\n", " for user, item in zip(*test_ui.nonzero()):\n", " result.append([user_code_id[user], item_code_id[item], \n", " self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])\n", " return result" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "toy train ui:\n" ] }, { "data": { "text/plain": [ "array([[3, 4, 0, 0, 5, 0, 0, 4],\n", " [0, 1, 2, 3, 0, 0, 0, 0],\n", " [0, 0, 0, 5, 0, 3, 4, 0]], dtype=int64)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "similarity matrix:\n" ] }, { "data": { "text/plain": [ "array([[1. , 0.9701425 , 0. , 0. , 1. ,\n", " 0. , 0. , 1. ],\n", " [0.9701425 , 1. , 0.24253563, 0.12478355, 0.9701425 ,\n", " 0. , 0. , 0.9701425 ],\n", " [0. , 0.24253563, 1. , 0.51449576, 0. ,\n", " 0. , 0. , 0. ],\n", " [0. , 0.12478355, 0.51449576, 1. , 0. ,\n", " 0.85749293, 0.85749293, 0. ],\n", " [1. , 0.9701425 , 0. , 0. , 1. ,\n", " 0. , 0. , 1. ],\n", " [0. , 0. , 0. , 0.85749293, 0. ,\n", " 1. , 1. , 0. ],\n", " [0. , 0. , 0. , 0.85749293, 0. ,\n", " 1. , 1. , 0. ],\n", " [1. , 0.9701425 , 0. , 0. , 1. ,\n", " 0. , 0. , 1. ]])" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "estimations matrix:\n" ] }, { "data": { "text/plain": [ "array([[4. , 4. , 4. , 4. , 4. ,\n", " nan, nan, 4. ],\n", " [1. , 1.35990333, 2.15478388, 2.53390319, 1. ,\n", " 3. , 3. , 1. ],\n", " [ nan, 5. , 5. , 4.05248907, nan,\n", " 3.95012863, 3.95012863, nan]])" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "[[0, 20, 4.0, 30, 4.0],\n", " [10, 50, 3.0, 60, 3.0, 0, 1.0, 40, 1.0, 70, 1.0],\n", " [20, 10, 5.0, 20, 5.0]]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# toy example\n", "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n", "\n", "toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \\\n", "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n", "\n", "\n", "model=IKNN()\n", "model.fit(toy_train_ui)\n", "\n", "print('toy train ui:')\n", "display(toy_train_ui.A)\n", "\n", "print('similarity matrix:')\n", "display(model.similarity_matrix_ii.A)\n", "\n", "print('estimations matrix:')\n", "display(model.estimations)\n", "\n", "model.recommend(toy_user_code_id, toy_item_code_id)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "model=IKNN()\n", "model.fit(train_ui)\n", "\n", "top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n", "\n", "top_n.to_csv('Recommendations generated/ml-100k/Self_IKNN_reco.csv', index=False, header=False)\n", "\n", "estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n", "estimations.to_csv('Recommendations generated/ml-100k/Self_IKNN_estimations.csv', index=False, header=False)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 3162.40it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRF_2Whole_averageReco in testTest coverageShannonGini
01.0183630.8087930.0003180.0001080.000140.0001890.00.00.0002140.0000370.0003680.4963910.0031810.0001180.0417550.3921530.115444.1747410.965327
\n", "
" ], "text/plain": [ " RMSE MAE precision recall F_1 F_05 \\\n", "0 1.018363 0.808793 0.000318 0.000108 0.00014 0.000189 \n", "\n", " precision_super recall_super NDCG mAP MRR LAUC \\\n", "0 0.0 0.0 0.000214 0.000037 0.000368 0.496391 \n", "\n", " HR F_2 Whole_average Reco in test Test coverage Shannon \\\n", "0 0.003181 0.000118 0.041755 0.392153 0.11544 4.174741 \n", "\n", " Gini \n", "0 0.965327 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import evaluation_measures as ev\n", "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Self_IKNN_estimations.csv', header=None)\n", "reco=np.loadtxt('Recommendations generated/ml-100k/Self_IKNN_reco.csv', delimiter=',')\n", "\n", "ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n", " estimations_df=estimations_df, \n", " reco=reco,\n", " super_reactions=[4,5])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 3730.64it/s]\n", "943it [00:00, 3921.13it/s]\n", "943it [00:00, 3732.17it/s]\n", "943it [00:00, 4078.27it/s]\n", "943it [00:00, 2833.82it/s]\n", "943it [00:00, 4027.94it/s]\n", "943it [00:00, 4634.12it/s]\n", "943it [00:00, 4453.36it/s]\n", "943it [00:00, 4301.74it/s]\n", "943it [00:00, 5008.94it/s]\n", "943it [00:00, 3542.76it/s]\n", "943it [00:00, 3280.94it/s]\n", "943it [00:00, 3370.61it/s]\n", "943it [00:00, 4467.43it/s]\n", "943it [00:00, 3794.77it/s]\n", "943it [00:00, 3759.22it/s]\n", "943it [00:00, 4144.81it/s]\n", "943it [00:00, 4232.41it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRF_2Whole_averageReco in testTest coverageShannonGini
0Self_RP3Beta3.7029283.5277130.3226940.2160690.2121520.2475380.2452790.2849830.3882710.2482390.6363180.6056830.9109230.2054500.3769670.9997880.1789324.5496630.950182
0Self_P33.7024463.5272730.2821850.1920920.1867490.2169800.2041850.2400960.3391140.2049050.5721570.5935440.8759280.1817020.3408031.0000000.0772013.8758920.974947
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.1374730.2146510.1117070.4009390.5555460.7656420.1127500.2496071.0000000.0389613.1590790.987317
0Self_SVDBaseline3.6456663.4802460.1378580.0823980.0841510.1010630.1079400.1093930.1644770.0829730.3423740.5380970.6383880.0798600.2057480.9998940.2792215.1590760.907220
0Ready_SVD0.9525630.7501580.0944860.0462740.0513890.0656250.0826180.0741500.1093200.0513830.2406930.5198490.4750800.0462370.1547590.9934250.2063494.4429960.952832
0Self_SVD0.9148900.7179620.1029690.0423250.0520220.0693130.0935620.0749940.1054160.0502780.1915330.5178900.4623540.0445910.1506040.8676560.1414143.9292490.971112
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379640.0395490.1419001.0000000.0339112.8365130.991139
0Self_KNNSurprisetask0.9462550.7452090.0834570.0328480.0412270.0554930.0747850.0488900.0895770.0409020.1890570.5130760.4178150.0349960.1351770.8885470.1305923.6118060.978659
0Self_TopRated2.5082582.2179090.0793210.0326670.0399830.0531700.0688840.0485820.0707660.0276020.1147900.5129430.4114530.0343850.1245461.0000000.0245312.7612380.991660
0Ready_SVDBiased0.9421410.7427600.0812300.0323440.0403020.0539320.0726390.0511260.0875520.0393460.1912850.5128180.4167550.0344050.1344780.9976670.1652244.1475790.964690
0Self_GlobalAvg1.1257600.9435340.0611880.0259680.0313830.0413430.0405580.0321070.0676950.0274700.1711870.5095460.3849420.0272130.1183831.0000000.0259742.7117720.992003
0Ready_Random1.5256331.2257140.0477200.0220490.0254940.0328450.0290770.0250150.0517570.0192420.1281810.5075430.3276780.0226280.1032690.9872750.1847045.1051220.906561
0Ready_I-KNN1.0303860.8130670.0260870.0069080.0105930.0160460.0211370.0095220.0242140.0089580.0480680.4998850.1548250.0080070.0695210.4023330.4343435.1336500.877999
0Ready_I-KNNBaseline0.9353270.7374240.0025450.0007550.0011050.0016020.0022530.0009300.0034440.0013620.0117600.4967240.0212090.0008620.0453790.4828210.0598852.2325780.994487
0Ready_U-KNN1.0234950.8079130.0007420.0002050.0003050.0004490.0005360.0001980.0008450.0002740.0027440.4964410.0074230.0002350.0425330.6021210.0108232.0891860.995706
0Self_BaselineIU0.9581360.7540510.0009540.0001880.0002980.0004810.0006440.0002230.0010430.0003350.0033480.4964330.0095440.0002200.0428090.6990460.0050511.9459100.995669
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.0001890.0007520.0001680.0016770.4964240.0095440.0002010.0426220.6005300.0050511.8031260.996380
0Self_IKNN1.0183630.8087930.0003180.0001080.0001400.0001890.0000000.0000000.0002140.0000370.0003680.4963910.0031810.0001180.0417550.3921530.1154404.1747410.965327
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_RP3Beta 3.702928 3.527713 0.322694 0.216069 0.212152 \n", "0 Self_P3 3.702446 3.527273 0.282185 0.192092 0.186749 \n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Self_SVDBaseline 3.645666 3.480246 0.137858 0.082398 0.084151 \n", "0 Ready_SVD 0.952563 0.750158 0.094486 0.046274 0.051389 \n", "0 Self_SVD 0.914890 0.717962 0.102969 0.042325 0.052022 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", "0 Self_KNNSurprisetask 0.946255 0.745209 0.083457 0.032848 0.041227 \n", "0 Self_TopRated 2.508258 2.217909 0.079321 0.032667 0.039983 \n", "0 Ready_SVDBiased 0.942141 0.742760 0.081230 0.032344 0.040302 \n", "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", "0 Ready_Random 1.525633 1.225714 0.047720 0.022049 0.025494 \n", "0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 \n", "0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 \n", "0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 \n", "0 Self_BaselineIU 0.958136 0.754051 0.000954 0.000188 0.000298 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n", "\n", " F_05 precision_super recall_super NDCG mAP MRR \\\n", "0 0.247538 0.245279 0.284983 0.388271 0.248239 0.636318 \n", "0 0.216980 0.204185 0.240096 0.339114 0.204905 0.572157 \n", "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n", "0 0.101063 0.107940 0.109393 0.164477 0.082973 0.342374 \n", "0 0.065625 0.082618 0.074150 0.109320 0.051383 0.240693 \n", "0 0.069313 0.093562 0.074994 0.105416 0.050278 0.191533 \n", "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n", "0 0.055493 0.074785 0.048890 0.089577 0.040902 0.189057 \n", "0 0.053170 0.068884 0.048582 0.070766 0.027602 0.114790 \n", "0 0.053932 0.072639 0.051126 0.087552 0.039346 0.191285 \n", "0 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 \n", "0 0.032845 0.029077 0.025015 0.051757 0.019242 0.128181 \n", "0 0.016046 0.021137 0.009522 0.024214 0.008958 0.048068 \n", "0 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 \n", "0 0.000449 0.000536 0.000198 0.000845 0.000274 0.002744 \n", "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n", "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n", "0 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 \n", "\n", " LAUC HR F_2 Whole_average Reco in test Test coverage \\\n", "0 0.605683 0.910923 0.205450 0.376967 0.999788 0.178932 \n", "0 0.593544 0.875928 0.181702 0.340803 1.000000 0.077201 \n", "0 0.555546 0.765642 0.112750 0.249607 1.000000 0.038961 \n", "0 0.538097 0.638388 0.079860 0.205748 0.999894 0.279221 \n", "0 0.519849 0.475080 0.046237 0.154759 0.993425 0.206349 \n", "0 0.517890 0.462354 0.044591 0.150604 0.867656 0.141414 \n", "0 0.515501 0.437964 0.039549 0.141900 1.000000 0.033911 \n", "0 0.513076 0.417815 0.034996 0.135177 0.888547 0.130592 \n", "0 0.512943 0.411453 0.034385 0.124546 1.000000 0.024531 \n", "0 0.512818 0.416755 0.034405 0.134478 0.997667 0.165224 \n", "0 0.509546 0.384942 0.027213 0.118383 1.000000 0.025974 \n", "0 0.507543 0.327678 0.022628 0.103269 0.987275 0.184704 \n", "0 0.499885 0.154825 0.008007 0.069521 0.402333 0.434343 \n", "0 0.496724 0.021209 0.000862 0.045379 0.482821 0.059885 \n", "0 0.496441 0.007423 0.000235 0.042533 0.602121 0.010823 \n", "0 0.496433 0.009544 0.000220 0.042809 0.699046 0.005051 \n", "0 0.496424 0.009544 0.000201 0.042622 0.600530 0.005051 \n", "0 0.496391 0.003181 0.000118 0.041755 0.392153 0.115440 \n", "\n", " Shannon Gini \n", "0 4.549663 0.950182 \n", "0 3.875892 0.974947 \n", "0 3.159079 0.987317 \n", "0 5.159076 0.907220 \n", "0 4.442996 0.952832 \n", "0 3.929249 0.971112 \n", "0 2.836513 0.991139 \n", "0 3.611806 0.978659 \n", "0 2.761238 0.991660 \n", "0 4.147579 0.964690 \n", "0 2.711772 0.992003 \n", "0 5.105122 0.906561 \n", "0 5.133650 0.877999 \n", "0 2.232578 0.994487 \n", "0 2.089186 0.995706 \n", "0 1.945910 0.995669 \n", "0 1.803126 0.996380 \n", "0 4.174741 0.965327 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import imp\n", "imp.reload(ev)\n", "\n", "import evaluation_measures as ev\n", "dir_path=\"Recommendations generated/ml-100k/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "\n", "ev.evaluate_all(test, dir_path, super_reactions)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Ready-made KNNs - Surprise implementation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### I-KNN - basic" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Computing the cosine similarity matrix...\n", "Done computing similarity matrix.\n", "Generating predictions...\n", "Generating top N recommendations...\n", "Generating predictions...\n" ] } ], "source": [ "import helpers\n", "import surprise as sp\n", "import imp\n", "imp.reload(helpers)\n", "\n", "sim_options = {'name': 'cosine',\n", " 'user_based': False} # compute similarities between items\n", "algo = sp.KNNBasic(sim_options=sim_options)\n", "\n", "helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_I-KNN_reco.csv',\n", " estimations_path='Recommendations generated/ml-100k/Ready_I-KNN_estimations.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### U-KNN - basic" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Computing the cosine similarity matrix...\n", "Done computing similarity matrix.\n", "Generating predictions...\n", "Generating top N recommendations...\n", "Generating predictions...\n" ] } ], "source": [ "import helpers\n", "import surprise as sp\n", "import imp\n", "imp.reload(helpers)\n", "\n", "sim_options = {'name': 'cosine',\n", " 'user_based': True} # compute similarities between users\n", "algo = sp.KNNBasic(sim_options=sim_options)\n", "\n", "helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_U-KNN_reco.csv',\n", " estimations_path='Recommendations generated/ml-100k/Ready_U-KNN_estimations.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### I-KNN - on top baseline" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Estimating biases using als...\n", "Computing the msd similarity matrix...\n", "Done computing similarity matrix.\n", "Generating predictions...\n", "Generating top N recommendations...\n", "Generating predictions...\n" ] } ], "source": [ "import helpers\n", "import surprise as sp\n", "import imp\n", "imp.reload(helpers)\n", "\n", "sim_options = {'name': 'cosine',\n", " 'user_based': False} # compute similarities between items\n", "algo = sp.KNNBaseline()\n", "\n", "helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_I-KNNBaseline_reco.csv',\n", " estimations_path='Recommendations generated/ml-100k/Ready_I-KNNBaseline_estimations.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# project task 4: use a version of your choice of Surprise KNNalgorithm" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Estimating biases using als...\n", "Computing the cosine similarity matrix...\n", "Done computing similarity matrix.\n", "Generating predictions...\n", "Generating top N recommendations...\n", "Generating predictions...\n" ] } ], "source": [ "# read the docs and try to find best parameter configuration (let say in terms of RMSE)\n", "# https://surprise.readthedocs.io/en/stable/knn_inspired.html##surprise.prediction_algorithms.knns.KNNBaseline\n", "# the solution here can be similar to examples above\n", "# please save the output in 'Recommendations generated/ml-100k/Self_KNNSurprisetask_reco.csv' and\n", "# 'Recommendations generated/ml-100k/Self_KNNSurprisetask_estimations.csv'\n", "\n", "## SOLUTION TASK 4\n", "\n", "import helpers\n", "import surprise as sp\n", "import imp\n", "\n", "imp.reload(helpers)\n", "\n", "sim_options = {'name': 'cosine',\n", " 'user_based': False}\n", "\n", "algo = sp.KNNBaseline(sim_options = sim_options)\n", "\n", "helpers.ready_made(algo, reco_path = 'Recommendations generated/ml-100k/Self_KNNSurprisetask_reco.csv',\n", " estimations_path = 'Recommendations generated/ml-100k/Self_KNNSurprisetask_estimations.csv')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 2579.01it/s]\n", "943it [00:00, 2473.28it/s]\n", "943it [00:00, 2787.61it/s]\n", "943it [00:00, 2862.03it/s]\n", "943it [00:00, 2636.14it/s]\n", "943it [00:00, 2764.91it/s]\n", "943it [00:00, 2362.52it/s]\n", "943it [00:00, 2446.87it/s]\n", "943it [00:00, 2961.39it/s]\n", "943it [00:00, 2858.86it/s]\n", "943it [00:00, 2449.24it/s]\n", "943it [00:00, 2748.70it/s]\n", "943it [00:00, 2379.95it/s]\n", "943it [00:00, 2599.51it/s]\n", "943it [00:00, 2705.51it/s]\n", "943it [00:00, 2574.33it/s]\n", "943it [00:00, 2450.80it/s]\n", "943it [00:00, 2242.87it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRF_2Whole_averageReco in testTest coverageShannonGini
0Self_RP3Beta3.7029283.5277130.3226940.2160690.2121520.2475380.2452790.2849830.3882710.2482390.6363180.6056830.9109230.2054500.3769670.9997880.1789324.5496630.950182
0Self_P33.7024463.5272730.2821850.1920920.1867490.2169800.2041850.2400960.3391140.2049050.5721570.5935440.8759280.1817020.3408031.0000000.0772013.8758920.974947
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.1374730.2146510.1117070.4009390.5555460.7656420.1127500.2496071.0000000.0389613.1590790.987317
0Self_SVDBaseline3.6456663.4802460.1378580.0823980.0841510.1010630.1079400.1093930.1644770.0829730.3423740.5380970.6383880.0798600.2057480.9998940.2792215.1590760.907220
0Ready_SVD0.9525630.7501580.0944860.0462740.0513890.0656250.0826180.0741500.1093200.0513830.2406930.5198490.4750800.0462370.1547590.9934250.2063494.4429960.952832
0Self_SVD0.9148900.7179620.1029690.0423250.0520220.0693130.0935620.0749940.1054160.0502780.1915330.5178900.4623540.0445910.1506040.8676560.1414143.9292490.971112
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.0564630.0959570.0431780.1981930.5155010.4379640.0395490.1419001.0000000.0339112.8365130.991139
0Self_KNNSurprisetask0.9462550.7452090.0834570.0328480.0412270.0554930.0747850.0488900.0895770.0409020.1890570.5130760.4178150.0349960.1351770.8885470.1305923.6118060.978659
0Self_TopRated2.5082582.2179090.0793210.0326670.0399830.0531700.0688840.0485820.0707660.0276020.1147900.5129430.4114530.0343850.1245461.0000000.0245312.7612380.991660
0Ready_SVDBiased0.9421410.7427600.0812300.0323440.0403020.0539320.0726390.0511260.0875520.0393460.1912850.5128180.4167550.0344050.1344780.9976670.1652244.1475790.964690
0Self_GlobalAvg1.1257600.9435340.0611880.0259680.0313830.0413430.0405580.0321070.0676950.0274700.1711870.5095460.3849420.0272130.1183831.0000000.0259742.7117720.992003
0Ready_Random1.5256331.2257140.0477200.0220490.0254940.0328450.0290770.0250150.0517570.0192420.1281810.5075430.3276780.0226280.1032690.9872750.1847045.1051220.906561
0Ready_I-KNN1.0303860.8130670.0260870.0069080.0105930.0160460.0211370.0095220.0242140.0089580.0480680.4998850.1548250.0080070.0695210.4023330.4343435.1336500.877999
0Ready_I-KNNBaseline0.9353270.7374240.0025450.0007550.0011050.0016020.0022530.0009300.0034440.0013620.0117600.4967240.0212090.0008620.0453790.4828210.0598852.2325780.994487
0Ready_U-KNN1.0234950.8079130.0007420.0002050.0003050.0004490.0005360.0001980.0008450.0002740.0027440.4964410.0074230.0002350.0425330.6021210.0108232.0891860.995706
0Self_BaselineIU0.9581360.7540510.0009540.0001880.0002980.0004810.0006440.0002230.0010430.0003350.0033480.4964330.0095440.0002200.0428090.6990460.0050511.9459100.995669
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.0001890.0007520.0001680.0016770.4964240.0095440.0002010.0426220.6005300.0050511.8031260.996380
0Self_IKNN1.0183630.8087930.0003180.0001080.0001400.0001890.0000000.0000000.0002140.0000370.0003680.4963910.0031810.0001180.0417550.3921530.1154404.1747410.965327
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_RP3Beta 3.702928 3.527713 0.322694 0.216069 0.212152 \n", "0 Self_P3 3.702446 3.527273 0.282185 0.192092 0.186749 \n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Self_SVDBaseline 3.645666 3.480246 0.137858 0.082398 0.084151 \n", "0 Ready_SVD 0.952563 0.750158 0.094486 0.046274 0.051389 \n", "0 Self_SVD 0.914890 0.717962 0.102969 0.042325 0.052022 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", "0 Self_KNNSurprisetask 0.946255 0.745209 0.083457 0.032848 0.041227 \n", "0 Self_TopRated 2.508258 2.217909 0.079321 0.032667 0.039983 \n", "0 Ready_SVDBiased 0.942141 0.742760 0.081230 0.032344 0.040302 \n", "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", "0 Ready_Random 1.525633 1.225714 0.047720 0.022049 0.025494 \n", "0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 \n", "0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 \n", "0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 \n", "0 Self_BaselineIU 0.958136 0.754051 0.000954 0.000188 0.000298 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n", "\n", " F_05 precision_super recall_super NDCG mAP MRR \\\n", "0 0.247538 0.245279 0.284983 0.388271 0.248239 0.636318 \n", "0 0.216980 0.204185 0.240096 0.339114 0.204905 0.572157 \n", "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n", "0 0.101063 0.107940 0.109393 0.164477 0.082973 0.342374 \n", "0 0.065625 0.082618 0.074150 0.109320 0.051383 0.240693 \n", "0 0.069313 0.093562 0.074994 0.105416 0.050278 0.191533 \n", "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n", "0 0.055493 0.074785 0.048890 0.089577 0.040902 0.189057 \n", "0 0.053170 0.068884 0.048582 0.070766 0.027602 0.114790 \n", "0 0.053932 0.072639 0.051126 0.087552 0.039346 0.191285 \n", "0 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 \n", "0 0.032845 0.029077 0.025015 0.051757 0.019242 0.128181 \n", "0 0.016046 0.021137 0.009522 0.024214 0.008958 0.048068 \n", "0 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 \n", "0 0.000449 0.000536 0.000198 0.000845 0.000274 0.002744 \n", "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n", "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n", "0 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 \n", "\n", " LAUC HR F_2 Whole_average Reco in test Test coverage \\\n", "0 0.605683 0.910923 0.205450 0.376967 0.999788 0.178932 \n", "0 0.593544 0.875928 0.181702 0.340803 1.000000 0.077201 \n", "0 0.555546 0.765642 0.112750 0.249607 1.000000 0.038961 \n", "0 0.538097 0.638388 0.079860 0.205748 0.999894 0.279221 \n", "0 0.519849 0.475080 0.046237 0.154759 0.993425 0.206349 \n", "0 0.517890 0.462354 0.044591 0.150604 0.867656 0.141414 \n", "0 0.515501 0.437964 0.039549 0.141900 1.000000 0.033911 \n", "0 0.513076 0.417815 0.034996 0.135177 0.888547 0.130592 \n", "0 0.512943 0.411453 0.034385 0.124546 1.000000 0.024531 \n", "0 0.512818 0.416755 0.034405 0.134478 0.997667 0.165224 \n", "0 0.509546 0.384942 0.027213 0.118383 1.000000 0.025974 \n", "0 0.507543 0.327678 0.022628 0.103269 0.987275 0.184704 \n", "0 0.499885 0.154825 0.008007 0.069521 0.402333 0.434343 \n", "0 0.496724 0.021209 0.000862 0.045379 0.482821 0.059885 \n", "0 0.496441 0.007423 0.000235 0.042533 0.602121 0.010823 \n", "0 0.496433 0.009544 0.000220 0.042809 0.699046 0.005051 \n", "0 0.496424 0.009544 0.000201 0.042622 0.600530 0.005051 \n", "0 0.496391 0.003181 0.000118 0.041755 0.392153 0.115440 \n", "\n", " Shannon Gini \n", "0 4.549663 0.950182 \n", "0 3.875892 0.974947 \n", "0 3.159079 0.987317 \n", "0 5.159076 0.907220 \n", "0 4.442996 0.952832 \n", "0 3.929249 0.971112 \n", "0 2.836513 0.991139 \n", "0 3.611806 0.978659 \n", "0 2.761238 0.991660 \n", "0 4.147579 0.964690 \n", "0 2.711772 0.992003 \n", "0 5.105122 0.906561 \n", "0 5.133650 0.877999 \n", "0 2.232578 0.994487 \n", "0 2.089186 0.995706 \n", "0 1.945910 0.995669 \n", "0 1.803126 0.996380 \n", "0 4.174741 0.965327 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import imp\n", "imp.reload(ev)\n", "\n", "import evaluation_measures as ev\n", "dir_path = \"Recommendations generated/ml-100k/\"\n", "super_reactions = [4, 5]\n", "test = pd.read_csv('./Datasets/ml-100k/test.csv', sep = '\\t', header = None)\n", "\n", "ev.evaluate_all(test, dir_path, super_reactions)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }