Systemy-rekomedacyjne-praca.../P4. Matrix Factorization.ipynb
2020-06-05 16:01:34 +02:00

106 KiB

Self made SVD

import helpers
import pandas as pd
import numpy as np
import scipy.sparse as sparse
from collections import defaultdict
from itertools import chain
import random

train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\t', header=None)
test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)
train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)
# Done similarly to https://github.com/albertauyeung/matrix-factorization-in-python
from tqdm import tqdm

class SVD():
    
    def __init__(self, train_ui, learning_rate, regularization, nb_factors, iterations):
        self.train_ui=train_ui
        self.uir=list(zip(*[train_ui.nonzero()[0],train_ui.nonzero()[1], train_ui.data]))
        
        self.learning_rate=learning_rate
        self.regularization=regularization
        self.iterations=iterations
        self.nb_users, self.nb_items=train_ui.shape
        self.nb_ratings=train_ui.nnz
        self.nb_factors=nb_factors
        
        self.Pu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))
        self.Qi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))

    def train(self, test_ui=None):
        if test_ui!=None:
            self.test_uir=list(zip(*[test_ui.nonzero()[0],test_ui.nonzero()[1], test_ui.data]))
            
        self.learning_process=[]
        pbar = tqdm(range(self.iterations))
        for i in pbar:
            pbar.set_description(f'Epoch {i} RMSE: {self.learning_process[-1][1] if i>0 else 0}. Training epoch {i+1}...')
            np.random.shuffle(self.uir)
            self.sgd(self.uir)
            if test_ui==None:
                self.learning_process.append([i+1, self.RMSE_total(self.uir)])
            else:
                self.learning_process.append([i+1, self.RMSE_total(self.uir), self.RMSE_total(self.test_uir)])
    
    def sgd(self, uir):
        
        for u, i, score in uir:
            # Computer prediction and error
            prediction = self.get_rating(u,i)
            e = (score - prediction)
            
            # Update user and item latent feature matrices
            Pu_update=self.learning_rate * (e * self.Qi[i] - self.regularization * self.Pu[u])
            Qi_update=self.learning_rate * (e * self.Pu[u] - self.regularization * self.Qi[i])
            
            self.Pu[u] += Pu_update
            self.Qi[i] += Qi_update
        
    def get_rating(self, u, i):
        prediction = self.Pu[u].dot(self.Qi[i].T)
        return prediction
    
    def RMSE_total(self, uir):
        RMSE=0
        for u,i, score in uir:
            prediction = self.get_rating(u,i)
            RMSE+=(score - prediction)**2
        return np.sqrt(RMSE/len(uir))
    
    def estimations(self):
        self.estimations=\
        np.dot(self.Pu,self.Qi.T)

    def recommend(self, user_code_id, item_code_id, topK=10):
        
        top_k = defaultdict(list)
        for nb_user, user in enumerate(self.estimations):
            
            user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]
            for item, score in enumerate(user):
                if item not in user_rated and not np.isnan(score):
                    top_k[user_code_id[nb_user]].append((item_code_id[item], score))
        result=[]
        # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)
        for uid, item_scores in top_k.items():
            item_scores.sort(key=lambda x: x[1], reverse=True)
            result.append([uid]+list(chain(*item_scores[:topK])))
        return result
    
    def estimate(self, user_code_id, item_code_id, test_ui):
        result=[]
        for user, item in zip(*test_ui.nonzero()):
            result.append([user_code_id[user], item_code_id[item], 
                           self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])
        return result
model=SVD(train_ui, learning_rate=0.005, regularization=0.02, nb_factors=100, iterations=40)
model.train(test_ui)
Epoch 39 RMSE: 0.7469054750619549. Training epoch 40...: 100%|█████████████████████████| 40/40 [01:10<00:00,  1.76s/it]
import matplotlib.pyplot as plt

df=pd.DataFrame(model.learning_process).iloc[:,:2]
df.columns=['epoch', 'train_RMSE']
plt.plot('epoch', 'train_RMSE', data=df, color='blue')
plt.legend()
<matplotlib.legend.Legend at 0x19ac6c66f08>
import matplotlib.pyplot as plt

df=pd.DataFrame(model.learning_process[10:], columns=['epoch', 'train_RMSE', 'test_RMSE'])
plt.plot('epoch', 'train_RMSE', data=df, color='blue')
plt.plot('epoch', 'test_RMSE', data=df, color='yellow', linestyle='dashed')
plt.legend()
<matplotlib.legend.Legend at 0x19ac1ce8308>

Saving and evaluating recommendations

model.estimations()

top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))

top_n.to_csv('Recommendations generated/ml-100k/Self_SVD_reco.csv', index=False, header=False)

estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))
estimations.to_csv('Recommendations generated/ml-100k/Self_SVD_estimations.csv', index=False, header=False)
import evaluation_measures as ev

estimations_df=pd.read_csv('Recommendations generated/ml-100k/Self_SVD_estimations.csv', header=None)
reco=np.loadtxt('Recommendations generated/ml-100k/Self_SVD_reco.csv', delimiter=',')

ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None),
            estimations_df=estimations_df, 
            reco=reco,
            super_reactions=[4,5])
943it [00:00, 11351.12it/s]
RMSE MAE precision recall F_1 F_05 precision_super recall_super NDCG mAP MRR LAUC HR F_2 Whole_average Reco in test Test coverage Shannon Gini
0 0.91489 0.717962 0.102969 0.042325 0.052022 0.069313 0.093562 0.074994 0.105416 0.050278 0.191533 0.51789 0.462354 0.044591 0.150604 0.867656 0.141414 3.929249 0.971112
import imp
imp.reload(ev)

import evaluation_measures as ev
dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)

ev.evaluate_all(test, dir_path, super_reactions)
943it [00:00, 11925.95it/s]
943it [00:00, 12314.60it/s]
943it [00:00, 10828.94it/s]
943it [00:00, 11925.91it/s]
943it [00:00, 11489.44it/s]
943it [00:00, 11489.57it/s]
943it [00:00, 10353.22it/s]
943it [00:00, 11925.88it/s]
943it [00:00, 11631.38it/s]
943it [00:00, 11631.42it/s]
943it [00:00, 10585.78it/s]
943it [00:00, 11215.99it/s]
943it [00:00, 11215.93it/s]
943it [00:00, 11776.84it/s]
943it [00:00, 12396.27it/s]
943it [00:00, 10468.25it/s]
943it [00:00, 11972.70it/s]
943it [00:00, 11925.84it/s]
Model RMSE MAE precision recall F_1 F_05 precision_super recall_super NDCG mAP MRR LAUC HR F_2 Whole_average Reco in test Test coverage Shannon Gini
0 Self_RP3Beta 3.702928 3.527713 0.322694 0.216069 0.212152 0.247538 0.245279 0.284983 0.388271 0.248239 0.636318 0.605683 0.910923 0.205450 0.376967 0.999788 0.178932 4.549663 0.950182
0 Self_P3 3.702446 3.527273 0.282185 0.192092 0.186749 0.216980 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 0.875928 0.181702 0.340803 1.000000 0.077201 3.875892 0.974947
0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 0.555546 0.765642 0.112750 0.249607 1.000000 0.038961 3.159079 0.987317
0 Self_SVDBaseline 3.645871 3.480308 0.135949 0.078868 0.082011 0.099188 0.106974 0.103767 0.159486 0.079783 0.328576 0.536311 0.632025 0.077145 0.201674 0.999894 0.281385 5.140721 0.909056
0 Ready_SVD 0.950835 0.748676 0.097879 0.048335 0.053780 0.068420 0.086159 0.080289 0.113553 0.054094 0.249037 0.520893 0.498409 0.048439 0.159941 0.997985 0.204906 4.395721 0.954872
0 Self_SVD 0.914890 0.717962 0.102969 0.042325 0.052022 0.069313 0.093562 0.074994 0.105416 0.050278 0.191533 0.517890 0.462354 0.044591 0.150604 0.867656 0.141414 3.929249 0.971112
0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 0.515501 0.437964 0.039549 0.141900 1.000000 0.033911 2.836513 0.991139
0 Ready_SVDBiased 0.943277 0.743628 0.080912 0.033048 0.040445 0.053881 0.070815 0.049631 0.090496 0.041928 0.200192 0.513176 0.411453 0.034776 0.135063 0.998727 0.168110 4.165618 0.964211
0 Self_KNNSurprisetask 0.946255 0.745209 0.083457 0.032848 0.041227 0.055493 0.074785 0.048890 0.089577 0.040902 0.189057 0.513076 0.417815 0.034996 0.135177 0.888547 0.130592 3.611806 0.978659
0 Self_TopRated 2.508258 2.217909 0.079321 0.032667 0.039983 0.053170 0.068884 0.048582 0.070766 0.027602 0.114790 0.512943 0.411453 0.034385 0.124546 1.000000 0.024531 2.761238 0.991660
0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 0.509546 0.384942 0.027213 0.118383 1.000000 0.025974 2.711772 0.992003
0 Ready_Random 1.514265 1.215956 0.048780 0.021007 0.024667 0.032495 0.031867 0.023414 0.052904 0.020511 0.126790 0.507024 0.322375 0.021635 0.102789 0.988017 0.183983 5.100443 0.906900
0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 0.016046 0.021137 0.009522 0.024214 0.008958 0.048068 0.499885 0.154825 0.008007 0.069521 0.402333 0.434343 5.133650 0.877999
0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 0.496724 0.021209 0.000862 0.045379 0.482821 0.059885 2.232578 0.994487
0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 0.000449 0.000536 0.000198 0.000845 0.000274 0.002744 0.496441 0.007423 0.000235 0.042533 0.602121 0.010823 2.089186 0.995706
0 Self_BaselineIU 0.958136 0.754051 0.000954 0.000188 0.000298 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 0.496433 0.009544 0.000220 0.042809 0.699046 0.005051 1.945910 0.995669
0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 0.496424 0.009544 0.000201 0.042622 0.600530 0.005051 1.803126 0.996380
0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 0.496391 0.003181 0.000118 0.041755 0.392153 0.115440 4.174741 0.965327

Embeddings

x=np.array([[1,2],[3,4]])
display(x)
x/np.linalg.norm(x, axis=1)[:,None]
array([[1, 2],
       [3, 4]])
array([[0.4472136 , 0.89442719],
       [0.6       , 0.8       ]])
item=random.choice(list(set(train_ui.indices)))

embeddings_norm=model.Qi/np.linalg.norm(model.Qi, axis=1)[:,None] # we do not mean-center here
# omitting normalization also makes sense, but items with a greater magnitude will be recommended more often

similarity_scores=np.dot(embeddings_norm,embeddings_norm[item].T)
top_similar_items=pd.DataFrame(enumerate(similarity_scores), columns=['code', 'score'])\
.sort_values(by=['score'], ascending=[False])[:10]

top_similar_items['item_id']=top_similar_items['code'].apply(lambda x: item_code_id[x])

items=pd.read_csv('./Datasets/ml-100k/movies.csv')

result=pd.merge(top_similar_items, items, left_on='item_id', right_on='id')

result
code score item_id id title genres
0 423 1.000000 424 424 Children of the Corn: The Gathering (1996) Horror
1 984 0.980977 985 985 Blood & Wine (1997) Drama
2 1458 0.980754 1459 1459 Madame Butterfly (1995) Musical
3 1278 0.980699 1279 1279 Wild America (1997) Adventure, Children's
4 1380 0.980041 1381 1381 Losing Chase (1996) Drama
5 744 0.979974 745 745 Ruling Class, The (1972) Comedy
6 705 0.979904 706 706 Bad Moon (1996) Horror
7 1085 0.979812 1086 1086 It's My Party (1995) Drama
8 1237 0.979443 1238 1238 Full Speed (1996) Drama
9 1190 0.979429 1191 1191 Letter From Death Row, A (1998) Crime, Drama

project task 5: implement SVD on top baseline (as it is in Surprise library)

# making changes to our implementation by considering additional parameters in the gradient descent procedure 
# seems to be the fastest option
# please save the output in 'Recommendations generated/ml-100k/Self_SVDBaseline_reco.csv' and
# 'Recommendations generated/ml-100k/Self_SVDBaseline_estimations.csv'
# Done similarly to https://github.com/albertauyeung/matrix-factorization-in-python
from tqdm import tqdm

class SVDboosted():
    
    def __init__(self, train_ui, learning_rate, regularization, nb_factors, iterations):
        self.train_ui=train_ui
        self.uir=list(zip(*[train_ui.nonzero()[0],train_ui.nonzero()[1], train_ui.data]))
        
        self.learning_rate=learning_rate
        self.regularization=regularization
        self.iterations=iterations
        self.nb_users, self.nb_items=train_ui.shape
        self.nb_ratings=train_ui.nnz
        self.nb_factors=nb_factors
        
        ###################################################
        #TASK
        self.Bu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))
        self.Bi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))
        ###################################################
        
        self.Pu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))
        self.Qi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))

    def train(self, test_ui=None):
        if test_ui!=None:
            self.test_uir=list(zip(*[test_ui.nonzero()[0],test_ui.nonzero()[1], test_ui.data]))
            
        self.learning_process=[]
        pbar = tqdm(range(self.iterations))
        for i in pbar:
            pbar.set_description(f'Epoch {i} RMSE: {self.learning_process[-1][1] if i>0 else 0}. Training epoch {i+1}...')
            np.random.shuffle(self.uir)
            self.sgd(self.uir)
            if test_ui==None:
                self.learning_process.append([i+1, self.RMSE_total(self.uir)])
            else:
                self.learning_process.append([i+1, self.RMSE_total(self.uir), self.RMSE_total(self.test_uir)])
    
    def sgd(self, uir):
        
        for u, i, score in uir:
            # Computer prediction and error
            prediction = self.get_rating(u,i)
            e = (score - prediction)
            
            # Update user and item latent feature matrices
            Pu_update=self.learning_rate * (e * self.Qi[i] - self.regularization * self.Pu[u])
            Qi_update=self.learning_rate * (e * self.Pu[u] - self.regularization * self.Qi[i])
            
            ###################################################
            #TASK
            Bu_update = self.learning_rate * (e - self.regularization * self.Bu[u])
            Bi_update = self.learning_rate * (e - self.regularization * self.Bi[i])
            
            self.Bu[u] += Bu_update
            self.Bi[i] += Bi_update
            ###################################################
            self.Pu[u] += Pu_update
            self.Qi[i] += Qi_update
        
    def get_rating(self, u, i):
        prediction = self.Bu[u] + self.Bi[i] + self.Pu[u].dot(self.Qi[i].T)
        return prediction
    
    def RMSE_total(self, uir):
        RMSE=0
        for u,i, score in uir:
            prediction = self.get_rating(u,i)
            ###################################################
            #TASK
            RMSE+=(score - prediction)**2
            #+ self.regularization * (self.Bi[i]**2 + self.Bu[u]**2 + self.Qi[i]**2 + self.Pu[u]**2)
            ###################################################
        return np.sqrt(RMSE/len(uir))
    
    def estimations(self):
        self.estimations=\
        np.dot(self.Pu,self.Qi.T)

    def recommend(self, user_code_id, item_code_id, topK=10):
        
        top_k = defaultdict(list)
        for nb_user, user in enumerate(self.estimations):
            
            user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]
            for item, score in enumerate(user):
                if item not in user_rated and not np.isnan(score):
                    top_k[user_code_id[nb_user]].append((item_code_id[item], score))
        result=[]
        # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)
        for uid, item_scores in top_k.items():
            item_scores.sort(key=lambda x: x[1], reverse=True)
            result.append([uid]+list(chain(*item_scores[:topK])))
        return result
    
    def estimate(self, user_code_id, item_code_id, test_ui):
        result=[]
        for user, item in zip(*test_ui.nonzero()):
            result.append([user_code_id[user], item_code_id[item], 
                           self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])
        return result
model=SVDboosted(train_ui, learning_rate=0.005, regularization=0.02, nb_factors=100, iterations=40)
model.train(test_ui)
Epoch 1 RMSE: [1.56545038 1.56550953 1.56561831 1.56506255 1.56544717 1.56542472        | 1/40 [00:02<01:50,  2.83s/it]
Epoch 1 RMSE: [1.56545038 1.56550953 1.56561831 1.56506255 1.56544717 1.56542472
Epoch 2 RMSE: [1.24195612 1.24198904 1.24211143 1.24181535 1.24194284 1.24200128
Epoch 2 RMSE: [1.24195612 1.24198904 1.24211143 1.24181535 1.24194284 1.24200128
Epoch 3 RMSE: [1.13247887 1.13249914 1.13258642 1.13239893 1.13245611 1.13250907
Epoch 3 RMSE: [1.13247887 1.13249914 1.13258642 1.13239893 1.13245611 1.13250907
Epoch 4 RMSE: [1.07474573 1.07476035 1.07482625 1.07468999 1.07472024 1.07476528
Epoch 4 RMSE: [1.07474573 1.07476035 1.07482625 1.07468999 1.07472024 1.07476528
Epoch 5 RMSE: [1.03819311 1.03820533 1.03825798 1.03815174 1.03816959 1.03820895
Epoch 5 RMSE: [1.03819311 1.03820533 1.03825798 1.03815174 1.03816959 1.03820895
Epoch 6 RMSE: [1.01305431 1.01306406 1.0131073  1.0130215  1.01303201 1.01306765
Epoch 6 RMSE: [1.01305431 1.01306406 1.0131073  1.0130215  1.01303201 1.01306765
Epoch 7 RMSE: [0.99453145 0.99453934 0.99457617 0.99450541 0.99451055 0.99454367
Epoch 7 RMSE: [0.99453145 0.99453934 0.99457617 0.99450541 0.99451055 0.99454367
Epoch 8 RMSE: [0.98059211 0.98059798 0.9806297  0.98057082 0.980572   0.98060328
Epoch 8 RMSE: [0.98059211 0.98059798 0.9806297  0.98057082 0.980572   0.98060328
Epoch 9 RMSE: [0.96964915 0.96965335 0.96968135 0.96963161 0.96962996 0.96965908
Epoch 9 RMSE: [0.96964915 0.96965335 0.96968135 0.96963161 0.96962996 0.96965908
Epoch 10 RMSE: [0.96082121 0.96082423 0.96084907 0.96080661 0.96080248 0.9608298
Epoch 10 RMSE: [0.96082121 0.96082423 0.96084907 0.96080661 0.96080248 0.9608298
Epoch 11 RMSE: [0.95365923 0.95366146 0.95368372 0.95364696 0.95364089 0.953667
Epoch 11 RMSE: [0.95365923 0.95366146 0.95368372 0.95364696 0.95364089 0.953667
Epoch 12 RMSE: [0.94766016 0.94766156 0.94768159 0.94764986 0.947642   0.94766674
Epoch 12 RMSE: [0.94766016 0.94766156 0.94768159 0.94764986 0.947642   0.94766674
Epoch 13 RMSE: [0.94261767 0.94261834 0.94263624 0.94260867 0.94259966 0.94262314
Epoch 13 RMSE: [0.94261767 0.94261834 0.94263624 0.94260867 0.94259966 0.94262314
Epoch 14 RMSE: [0.938169   0.9381694  0.93818542 0.93816133 0.93815137 0.93817355
Epoch 14 RMSE: [0.938169   0.9381694  0.93818542 0.93816133 0.93815137 0.93817355
Epoch 15 RMSE: [0.93426778 0.9342676  0.9342824  0.93426131 0.93425033 0.93427147
Epoch 15 RMSE: [0.93426778 0.9342676  0.9342824  0.93426131 0.93425033 0.93427147
Epoch 16 RMSE: [0.93070807 0.93070774 0.93072136 0.93070245 0.93069117 0.93071116
Epoch 16 RMSE: [0.93070807 0.93070774 0.93072136 0.93070245 0.93069117 0.93071116
Epoch 17 RMSE: [0.92736841 0.92736783 0.9273801  0.92736348 0.92735194 0.92737089
Epoch 17 RMSE: [0.92736841 0.92736783 0.9273801  0.92736348 0.92735194 0.92737089
Epoch 18 RMSE: [0.92401862 0.92401778 0.92402901 0.92401417 0.92400251 0.92402036
Epoch 18 RMSE: [0.92401862 0.92401778 0.92402901 0.92401417 0.92400251 0.92402036
Epoch 19 RMSE: [0.92070555 0.92070466 0.92071497 0.92070168 0.92069002 0.92070695
Epoch 19 RMSE: [0.92070555 0.92070466 0.92071497 0.92070168 0.92069002 0.92070695
Epoch 20 RMSE: [0.91707281 0.91707182 0.91708121 0.91706927 0.91705756 0.91707364
Epoch 20 RMSE: [0.91707281 0.91707182 0.91708121 0.91706927 0.91705756 0.91707364
Epoch 21 RMSE: [0.91348792 0.91348679 0.91349536 0.91348474 0.91347301 0.91348798
Epoch 21 RMSE: [0.91348792 0.91348679 0.91349536 0.91348474 0.91347301 0.91348798
Epoch 22 RMSE: [0.90957269 0.90957161 0.90957937 0.90956986 0.9095583  0.90957263
Epoch 22 RMSE: [0.90957269 0.90957161 0.90957937 0.90956986 0.9095583  0.90957263
Epoch 23 RMSE: [0.90514652 0.90514525 0.90515258 0.90514401 0.90513237 0.90514616
Epoch 23 RMSE: [0.90514652 0.90514525 0.90515258 0.90514401 0.90513237 0.90514616
Epoch 24 RMSE: [0.90024301 0.90024177 0.90024834 0.90024062 0.90022924 0.90024226
Epoch 24 RMSE: [0.90024301 0.90024177 0.90024834 0.90024062 0.90022924 0.90024226
Epoch 25 RMSE: [0.89486164 0.89486046 0.89486646 0.89485941 0.89484821 0.89486074
Epoch 25 RMSE: [0.89486164 0.89486046 0.89486646 0.89485941 0.89484821 0.89486074
Epoch 26 RMSE: [0.88903799 0.88903702 0.88904243 0.88903603 0.88902515 0.88903703
Epoch 26 RMSE: [0.88903799 0.88903702 0.88904243 0.88903603 0.88902515 0.88903703
Epoch 27 RMSE: [0.88287967 0.88287863 0.88288359 0.88287782 0.88286725 0.88287848
Epoch 27 RMSE: [0.88287967 0.88287863 0.88288359 0.88287782 0.88286725 0.88287848
Epoch 28 RMSE: [0.87627836 0.87627739 0.87628186 0.8762767  0.87626624 0.8762769
Epoch 28 RMSE: [0.87627836 0.87627739 0.87628186 0.8762767  0.87626624 0.8762769
Epoch 29 RMSE: [0.86929878 0.86929781 0.86930189 0.86929724 0.86928706 0.86929718
Epoch 29 RMSE: [0.86929878 0.86929781 0.86930189 0.86929724 0.86928706 0.86929718
Epoch 30 RMSE: [0.8620862  0.86208531 0.86208899 0.86208476 0.86207471 0.86208446
Epoch 30 RMSE: [0.8620862  0.86208531 0.86208899 0.86208476 0.86207471 0.86208446
Epoch 31 RMSE: [0.85452681 0.85452603 0.85452921 0.85452551 0.85451573 0.85452503
Epoch 31 RMSE: [0.85452681 0.85452603 0.85452921 0.85452551 0.85451573 0.85452503
Epoch 32 RMSE: [0.84666254 0.84666179 0.84666469 0.84666136 0.8466519  0.84666071
Epoch 32 RMSE: [0.84666254 0.84666179 0.84666469 0.84666136 0.8466519  0.84666071
Epoch 33 RMSE: [0.83814164 0.83814095 0.83814355 0.83814049 0.83813121 0.83813985
Epoch 33 RMSE: [0.83814164 0.83814095 0.83814355 0.83814049 0.83813121 0.83813985
Epoch 34 RMSE: [0.8295605  0.82955997 0.82956222 0.82955955 0.8295505  0.82955863
Epoch 34 RMSE: [0.8295605  0.82955997 0.82956222 0.82955955 0.8295505  0.82955863
Epoch 35 RMSE: [0.82044053 0.82044008 0.82044203 0.82043968 0.82043093 0.82043865
Epoch 35 RMSE: [0.82044053 0.82044008 0.82044203 0.82043968 0.82043093 0.82043865
Epoch 36 RMSE: [0.81096402 0.81096361 0.81096533 0.81096327 0.8109547  0.81096221
Epoch 36 RMSE: [0.81096402 0.81096361 0.81096533 0.81096327 0.8109547  0.81096221
Epoch 37 RMSE: [0.80132847 0.80132813 0.80132952 0.80132774 0.80131947 0.80132666
Epoch 37 RMSE: [0.80132847 0.80132813 0.80132952 0.80132774 0.80131947 0.80132666
Epoch 38 RMSE: [0.79114637 0.79114614 0.79114736 0.79114574 0.79113763 0.79114471
Epoch 38 RMSE: [0.79114637 0.79114614 0.79114736 0.79114574 0.79113763 0.79114471
Epoch 39 RMSE: [0.78071211 0.78071175 0.78071286 0.78071155 0.7807037  0.78071049
Epoch 39 RMSE: [0.78071211 0.78071175 0.78071286 0.78071155 0.7807037  0.78071049
Epoch 39 RMSE: [0.78071211 0.78071175 0.78071286 0.78071155 0.7807037  0.78071049
 0.78071326 0.78071879 0.78071336 0.7
model.estimations()

top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))

top_n.to_csv('Recommendations generated/ml-100k/Self_SVDBaseline_reco.csv', index=False, header=False)

estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))
estimations.to_csv('Recommendations generated/ml-100k/Self_SVDBaseline_estimations.csv', index=False, header=False)
import imp
imp.reload(ev)

import evaluation_measures as ev
dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)

ev.evaluate_all(test, dir_path, super_reactions)
943it [00:00, 11925.62it/s]
943it [00:00, 12690.64it/s]
943it [00:00, 11084.16it/s]
943it [00:00, 12561.94it/s]
943it [00:00, 11489.57it/s]
943it [00:00, 11776.67it/s]
943it [00:00, 10955.28it/s]
943it [00:00, 12078.77it/s]
943it [00:00, 11776.77it/s]
943it [00:00, 11631.38it/s]
943it [00:00, 11631.38it/s]
943it [00:00, 12235.67it/s]
943it [00:00, 11350.01it/s]
943it [00:00, 11702.10it/s]
943it [00:00, 12645.60it/s]
943it [00:00, 10586.01it/s]
943it [00:00, 11631.01it/s]
943it [00:00, 11631.32it/s]
Model RMSE MAE precision recall F_1 F_05 precision_super recall_super NDCG mAP MRR LAUC HR F_2 Whole_average Reco in test Test coverage Shannon Gini
0 Self_RP3Beta 3.702928 3.527713 0.322694 0.216069 0.212152 0.247538 0.245279 0.284983 0.388271 0.248239 0.636318 0.605683 0.910923 0.205450 0.376967 0.999788 0.178932 4.549663 0.950182
0 Self_P3 3.702446 3.527273 0.282185 0.192092 0.186749 0.216980 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 0.875928 0.181702 0.340803 1.000000 0.077201 3.875892 0.974947
0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 0.555546 0.765642 0.112750 0.249607 1.000000 0.038961 3.159079 0.987317
0 Self_SVDBaseline 3.645666 3.480246 0.137858 0.082398 0.084151 0.101063 0.107940 0.109393 0.164477 0.082973 0.342374 0.538097 0.638388 0.079860 0.205748 0.999894 0.279221 5.159076 0.907220
0 Ready_SVD 0.950835 0.748676 0.097879 0.048335 0.053780 0.068420 0.086159 0.080289 0.113553 0.054094 0.249037 0.520893 0.498409 0.048439 0.159941 0.997985 0.204906 4.395721 0.954872
0 Self_SVD 0.914890 0.717962 0.102969 0.042325 0.052022 0.069313 0.093562 0.074994 0.105416 0.050278 0.191533 0.517890 0.462354 0.044591 0.150604 0.867656 0.141414 3.929249 0.971112
0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 0.515501 0.437964 0.039549 0.141900 1.000000 0.033911 2.836513 0.991139
0 Ready_SVDBiased 0.943277 0.743628 0.080912 0.033048 0.040445 0.053881 0.070815 0.049631 0.090496 0.041928 0.200192 0.513176 0.411453 0.034776 0.135063 0.998727 0.168110 4.165618 0.964211
0 Self_KNNSurprisetask 0.946255 0.745209 0.083457 0.032848 0.041227 0.055493 0.074785 0.048890 0.089577 0.040902 0.189057 0.513076 0.417815 0.034996 0.135177 0.888547 0.130592 3.611806 0.978659
0 Self_TopRated 2.508258 2.217909 0.079321 0.032667 0.039983 0.053170 0.068884 0.048582 0.070766 0.027602 0.114790 0.512943 0.411453 0.034385 0.124546 1.000000 0.024531 2.761238 0.991660
0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 0.509546 0.384942 0.027213 0.118383 1.000000 0.025974 2.711772 0.992003
0 Ready_Random 1.514265 1.215956 0.048780 0.021007 0.024667 0.032495 0.031867 0.023414 0.052904 0.020511 0.126790 0.507024 0.322375 0.021635 0.102789 0.988017 0.183983 5.100443 0.906900
0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 0.016046 0.021137 0.009522 0.024214 0.008958 0.048068 0.499885 0.154825 0.008007 0.069521 0.402333 0.434343 5.133650 0.877999
0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 0.496724 0.021209 0.000862 0.045379 0.482821 0.059885 2.232578 0.994487
0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 0.000449 0.000536 0.000198 0.000845 0.000274 0.002744 0.496441 0.007423 0.000235 0.042533 0.602121 0.010823 2.089186 0.995706
0 Self_BaselineIU 0.958136 0.754051 0.000954 0.000188 0.000298 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 0.496433 0.009544 0.000220 0.042809 0.699046 0.005051 1.945910 0.995669
0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 0.496424 0.009544 0.000201 0.042622 0.600530 0.005051 1.803126 0.996380
0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 0.496391 0.003181 0.000118 0.041755 0.392153 0.115440 4.174741 0.965327

Ready-made SVD - Surprise implementation

SVD

import helpers
import surprise as sp
import imp
imp.reload(helpers)

algo = sp.SVD(biased=False) # to use unbiased version

helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_SVD_reco.csv',
          estimations_path='Recommendations generated/ml-100k/Ready_SVD_estimations.csv')
Generating predictions...
Generating top N recommendations...
Generating predictions...

SVD biased - on top baseline

import helpers
import surprise as sp
import imp
imp.reload(helpers)

algo = sp.SVD() # default is biased=True

helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_SVDBiased_reco.csv',
          estimations_path='Recommendations generated/ml-100k/Ready_SVDBiased_estimations.csv')
Generating predictions...
Generating top N recommendations...
Generating predictions...
import imp
imp.reload(ev)

import evaluation_measures as ev
dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)

ev.evaluate_all(test, dir_path, super_reactions)
943it [00:00, 11925.95it/s]
943it [00:00, 12314.95it/s]
943it [00:00, 10706.18it/s]
943it [00:00, 11925.84it/s]
943it [00:00, 11349.81it/s]
943it [00:00, 11925.98it/s]
943it [00:00, 11018.70it/s]
943it [00:00, 12396.58it/s]
943it [00:00, 12562.02it/s]
943it [00:00, 11489.71it/s]
943it [00:00, 11631.38it/s]
943it [00:00, 12210.06it/s]
943it [00:00, 11629.33it/s]