Systemy-rekomedacyjne-praca.../P4. Matrix Factorization.ipynb

106 KiB

Self made SVD

import helpers
import pandas as pd
import numpy as np
import scipy.sparse as sparse
from collections import defaultdict
from itertools import chain
import random

train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\t', header=None)
test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)
train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)
# Done similarly to https://github.com/albertauyeung/matrix-factorization-in-python
from tqdm import tqdm

class SVD():
    
    def __init__(self, train_ui, learning_rate, regularization, nb_factors, iterations):
        self.train_ui=train_ui
        self.uir=list(zip(*[train_ui.nonzero()[0],train_ui.nonzero()[1], train_ui.data]))
        
        self.learning_rate=learning_rate
        self.regularization=regularization
        self.iterations=iterations
        self.nb_users, self.nb_items=train_ui.shape
        self.nb_ratings=train_ui.nnz
        self.nb_factors=nb_factors
        
        self.Pu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))
        self.Qi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))

    def train(self, test_ui=None):
        if test_ui!=None:
            self.test_uir=list(zip(*[test_ui.nonzero()[0],test_ui.nonzero()[1], test_ui.data]))
            
        self.learning_process=[]
        pbar = tqdm(range(self.iterations))
        for i in pbar:
            pbar.set_description(f'Epoch {i} RMSE: {self.learning_process[-1][1] if i>0 else 0}. Training epoch {i+1}...')
            np.random.shuffle(self.uir)
            self.sgd(self.uir)
            if test_ui==None:
                self.learning_process.append([i+1, self.RMSE_total(self.uir)])
            else:
                self.learning_process.append([i+1, self.RMSE_total(self.uir), self.RMSE_total(self.test_uir)])
    
    def sgd(self, uir):
        
        for u, i, score in uir:
            # Computer prediction and error
            prediction = self.get_rating(u,i)
            e = (score - prediction)
            
            # Update user and item latent feature matrices
            Pu_update=self.learning_rate * (e * self.Qi[i] - self.regularization * self.Pu[u])
            Qi_update=self.learning_rate * (e * self.Pu[u] - self.regularization * self.Qi[i])
            
            self.Pu[u] += Pu_update
            self.Qi[i] += Qi_update
        
    def get_rating(self, u, i):
        prediction = self.Pu[u].dot(self.Qi[i].T)
        return prediction
    
    def RMSE_total(self, uir):
        RMSE=0
        for u,i, score in uir:
            prediction = self.get_rating(u,i)
            RMSE+=(score - prediction)**2
        return np.sqrt(RMSE/len(uir))
    
    def estimations(self):
        self.estimations=\
        np.dot(self.Pu,self.Qi.T)

    def recommend(self, user_code_id, item_code_id, topK=10):
        
        top_k = defaultdict(list)
        for nb_user, user in enumerate(self.estimations):
            
            user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]
            for item, score in enumerate(user):
                if item not in user_rated and not np.isnan(score):
                    top_k[user_code_id[nb_user]].append((item_code_id[item], score))
        result=[]
        # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)
        for uid, item_scores in top_k.items():
            item_scores.sort(key=lambda x: x[1], reverse=True)
            result.append([uid]+list(chain(*item_scores[:topK])))
        return result
    
    def estimate(self, user_code_id, item_code_id, test_ui):
        result=[]
        for user, item in zip(*test_ui.nonzero()):
            result.append([user_code_id[user], item_code_id[item], 
                           self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])
        return result
model=SVD(train_ui, learning_rate=0.005, regularization=0.02, nb_factors=100, iterations=40)
model.train(test_ui)
Epoch 39 RMSE: 0.7469054750619549. Training epoch 40...: 100%|█████████████████████████| 40/40 [01:10<00:00,  1.76s/it]
import matplotlib.pyplot as plt

df=pd.DataFrame(model.learning_process).iloc[:,:2]
df.columns=['epoch', 'train_RMSE']
plt.plot('epoch', 'train_RMSE', data=df, color='blue')
plt.legend()
<matplotlib.legend.Legend at 0x19ac6c66f08>
import matplotlib.pyplot as plt

df=pd.DataFrame(model.learning_process[10:], columns=['epoch', 'train_RMSE', 'test_RMSE'])
plt.plot('epoch', 'train_RMSE', data=df, color='blue')
plt.plot('epoch', 'test_RMSE', data=df, color='yellow', linestyle='dashed')
plt.legend()
<matplotlib.legend.Legend at 0x19ac1ce8308>

Saving and evaluating recommendations

model.estimations()

top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))

top_n.to_csv('Recommendations generated/ml-100k/Self_SVD_reco.csv', index=False, header=False)

estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))
estimations.to_csv('Recommendations generated/ml-100k/Self_SVD_estimations.csv', index=False, header=False)
import evaluation_measures as ev

estimations_df=pd.read_csv('Recommendations generated/ml-100k/Self_SVD_estimations.csv', header=None)
reco=np.loadtxt('Recommendations generated/ml-100k/Self_SVD_reco.csv', delimiter=',')

ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None),
            estimations_df=estimations_df, 
            reco=reco,
            super_reactions=[4,5])
943it [00:00, 11351.12it/s]
RMSE MAE precision recall F_1 F_05 precision_super recall_super NDCG mAP MRR LAUC HR F_2 Whole_average Reco in test Test coverage Shannon Gini
0 0.91489 0.717962 0.102969 0.042325 0.052022 0.069313 0.093562 0.074994 0.105416 0.050278 0.191533 0.51789 0.462354 0.044591 0.150604 0.867656 0.141414 3.929249 0.971112
import imp
imp.reload(ev)

import evaluation_measures as ev
dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)

ev.evaluate_all(test, dir_path, super_reactions)
943it [00:00, 11925.95it/s]
943it [00:00, 12314.60it/s]
943it [00:00, 10828.94it/s]
943it [00:00, 11925.91it/s]
943it [00:00, 11489.44it/s]
943it [00:00, 11489.57it/s]
943it [00:00, 10353.22it/s]
943it [00:00, 11925.88it/s]
943it [00:00, 11631.38it/s]
943it [00:00, 11631.42it/s]
943it [00:00, 10585.78it/s]
943it [00:00, 11215.99it/s]
943it [00:00, 11215.93it/s]
943it [00:00, 11776.84it/s]
943it [00:00, 12396.27it/s]
943it [00:00, 10468.25it/s]
943it [00:00, 11972.70it/s]
943it [00:00, 11925.84it/s]
Model RMSE MAE precision recall F_1 F_05 precision_super recall_super NDCG mAP MRR LAUC HR F_2 Whole_average Reco in test Test coverage Shannon Gini
0 Self_RP3Beta 3.702928 3.527713 0.322694 0.216069 0.212152 0.247538 0.245279 0.284983 0.388271 0.248239 0.636318 0.605683 0.910923 0.205450 0.376967 0.999788 0.178932 4.549663 0.950182
0 Self_P3 3.702446 3.527273 0.282185 0.192092 0.186749 0.216980 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 0.875928 0.181702 0.340803 1.000000 0.077201 3.875892 0.974947
0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 0.555546 0.765642 0.112750 0.249607 1.000000 0.038961 3.159079 0.987317
0 Self_SVDBaseline 3.645871 3.480308 0.135949 0.078868 0.082011 0.099188 0.106974 0.103767 0.159486 0.079783 0.328576 0.536311 0.632025 0.077145 0.201674 0.999894 0.281385 5.140721 0.909056
0 Ready_SVD 0.950835 0.748676 0.097879 0.048335 0.053780 0.068420 0.086159 0.080289 0.113553 0.054094 0.249037 0.520893 0.498409 0.048439 0.159941 0.997985 0.204906 4.395721 0.954872
0 Self_SVD 0.914890 0.717962 0.102969 0.042325 0.052022 0.069313 0.093562 0.074994 0.105416 0.050278 0.191533 0.517890 0.462354 0.044591 0.150604 0.867656 0.141414 3.929249 0.971112
0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 0.515501 0.437964 0.039549 0.141900 1.000000 0.033911 2.836513 0.991139
0 Ready_SVDBiased 0.943277 0.743628 0.080912 0.033048 0.040445 0.053881 0.070815 0.049631 0.090496 0.041928 0.200192 0.513176 0.411453 0.034776 0.135063 0.998727 0.168110 4.165618 0.964211
0 Self_KNNSurprisetask 0.946255 0.745209 0.083457 0.032848 0.041227 0.055493 0.074785 0.048890 0.089577 0.040902 0.189057 0.513076 0.417815 0.034996 0.135177 0.888547 0.130592 3.611806 0.978659
0 Self_TopRated 2.508258 2.217909 0.079321 0.032667 0.039983 0.053170 0.068884 0.048582 0.070766 0.027602 0.114790 0.512943 0.411453 0.034385 0.124546 1.000000 0.024531 2.761238 0.991660
0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 0.509546 0.384942 0.027213 0.118383 1.000000 0.025974 2.711772 0.992003
0 Ready_Random 1.514265 1.215956 0.048780 0.021007 0.024667 0.032495 0.031867 0.023414 0.052904 0.020511 0.126790 0.507024 0.322375 0.021635 0.102789 0.988017 0.183983 5.100443 0.906900
0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 0.016046 0.021137 0.009522 0.024214 0.008958 0.048068 0.499885 0.154825 0.008007 0.069521 0.402333 0.434343 5.133650 0.877999
0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 0.496724 0.021209 0.000862 0.045379 0.482821 0.059885 2.232578 0.994487
0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 0.000449 0.000536 0.000198 0.000845 0.000274 0.002744 0.496441 0.007423 0.000235 0.042533 0.602121 0.010823 2.089186 0.995706
0 Self_BaselineIU 0.958136 0.754051 0.000954 0.000188 0.000298 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 0.496433 0.009544 0.000220 0.042809 0.699046 0.005051 1.945910 0.995669
0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 0.496424 0.009544 0.000201 0.042622 0.600530 0.005051 1.803126 0.996380
0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 0.496391 0.003181 0.000118 0.041755 0.392153 0.115440 4.174741 0.965327

Embeddings

x=np.array([[1,2],[3,4]])
display(x)
x/np.linalg.norm(x, axis=1)[:,None]
array([[1, 2],
       [3, 4]])
array([[0.4472136 , 0.89442719],
       [0.6       , 0.8       ]])
item=random.choice(list(set(train_ui.indices)))

embeddings_norm=model.Qi/np.linalg.norm(model.Qi, axis=1)[:,None] # we do not mean-center here
# omitting normalization also makes sense, but items with a greater magnitude will be recommended more often

similarity_scores=np.dot(embeddings_norm,embeddings_norm[item].T)
top_similar_items=pd.DataFrame(enumerate(similarity_scores), columns=['code', 'score'])\
.sort_values(by=['score'], ascending=[False])[:10]

top_similar_items['item_id']=top_similar_items['code'].apply(lambda x: item_code_id[x])

items=pd.read_csv('./Datasets/ml-100k/movies.csv')

result=pd.merge(top_similar_items, items, left_on='item_id', right_on='id')

result
code score item_id id title genres
0 423 1.000000 424 424 Children of the Corn: The Gathering (1996) Horror
1 984 0.980977 985 985 Blood & Wine (1997) Drama
2 1458 0.980754 1459 1459 Madame Butterfly (1995) Musical
3 1278 0.980699 1279 1279 Wild America (1997) Adventure, Children's
4 1380 0.980041 1381 1381 Losing Chase (1996) Drama
5 744 0.979974 745 745 Ruling Class, The (1972) Comedy
6 705 0.979904 706 706 Bad Moon (1996) Horror
7 1085 0.979812 1086 1086 It's My Party (1995) Drama
8 1237 0.979443 1238 1238 Full Speed (1996) Drama
9 1190 0.979429 1191 1191 Letter From Death Row, A (1998) Crime, Drama

project task 5: implement SVD on top baseline (as it is in Surprise library)

# making changes to our implementation by considering additional parameters in the gradient descent procedure 
# seems to be the fastest option
# please save the output in 'Recommendations generated/ml-100k/Self_SVDBaseline_reco.csv' and
# 'Recommendations generated/ml-100k/Self_SVDBaseline_estimations.csv'
# Done similarly to https://github.com/albertauyeung/matrix-factorization-in-python
from tqdm import tqdm

class SVDboosted():
    
    def __init__(self, train_ui, learning_rate, regularization, nb_factors, iterations):
        self.train_ui=train_ui
        self.uir=list(zip(*[train_ui.nonzero()[0],train_ui.nonzero()[1], train_ui.data]))
        
        self.learning_rate=learning_rate
        self.regularization=regularization
        self.iterations=iterations
        self.nb_users, self.nb_items=train_ui.shape
        self.nb_ratings=train_ui.nnz
        self.nb_factors=nb_factors
        
        ###################################################
        #TASK
        self.Bu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))
        self.Bi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))
        ###################################################
        
        self.Pu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))
        self.Qi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))

    def train(self, test_ui=None):
        if test_ui!=None:
            self.test_uir=list(zip(*[test_ui.nonzero()[0],test_ui.nonzero()[1], test_ui.data]))
            
        self.learning_process=[]
        pbar = tqdm(range(self.iterations))
        for i in pbar:
            pbar.set_description(f'Epoch {i} RMSE: {self.learning_process[-1][1] if i>0 else 0}. Training epoch {i+1}...')
            np.random.shuffle(self.uir)
            self.sgd(self.uir)
            if test_ui==None:
                self.learning_process.append([i+1, self.RMSE_total(self.uir)])
            else:
                self.learning_process.append([i+1, self.RMSE_total(self.uir), self.RMSE_total(self.test_uir)])
    
    def sgd(self, uir):
        
        for u, i, score in uir:
            # Computer prediction and error
            prediction = self.get_rating(u,i)
            e = (score - prediction)
            
            # Update user and item latent feature matrices
            Pu_update=self.learning_rate * (e * self.Qi[i] - self.regularization * self.Pu[u])
            Qi_update=self.learning_rate * (e * self.Pu[u] - self.regularization * self.Qi[i])
            
            ###################################################
            #TASK
            Bu_update = self.learning_rate * (e - self.regularization * self.Bu[u])
            Bi_update = self.learning_rate * (e - self.regularization * self.Bi[i])
            
            self.Bu[u] += Bu_update
            self.Bi[i] += Bi_update
            ###################################################
            self.Pu[u] += Pu_update
            self.Qi[i] += Qi_update
        
    def get_rating(self, u, i):
        prediction = self.Bu[u] + self.Bi[i] + self.Pu[u].dot(self.Qi[i].T)
        return prediction
    
    def RMSE_total(self, uir):
        RMSE=0
        for u,i, score in uir:
            prediction = self.get_rating(u,i)
            ###################################################
            #TASK
            RMSE+=(score - prediction)**2
            #+ self.regularization * (self.Bi[i]**2 + self.Bu[u]**2 + self.Qi[i]**2 + self.Pu[u]**2)
            ###################################################
        return np.sqrt(RMSE/len(uir))
    
    def estimations(self):
        self.estimations=\
        np.dot(self.Pu,self.Qi.T)

    def recommend(self, user_code_id, item_code_id, topK=10):
        
        top_k = defaultdict(list)
        for nb_user, user in enumerate(self.estimations):
            
            user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]
            for item, score in enumerate(user):
                if item not in user_rated and not np.isnan(score):
                    top_k[user_code_id[nb_user]].append((item_code_id[item], score))
        result=[]
        # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)
        for uid, item_scores in top_k.items():
            item_scores.sort(key=lambda x: x[1], reverse=True)
            result.append([uid]+list(chain(*item_scores[:topK])))
        return result
    
    def estimate(self, user_code_id, item_code_id, test_ui):
        result=[]
        for user, item in zip(*test_ui.nonzero()):
            result.append([user_code_id[user], item_code_id[item], 
                           self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])
        return result
model=SVDboosted(train_ui, learning_rate=0.005, regularization=0.02, nb_factors=100, iterations=40)
model.train(test_ui)
Epoch 1 RMSE: [1.56545038 1.56550953 1.56561831 1.56506255 1.56544717 1.56542472        | 1/40 [00:02<01:50,  2.83s/it]
Epoch 1 RMSE: [1.56545038 1.56550953 1.56561831 1.56506255 1.56544717 1.56542472
Epoch 2 RMSE: [1.24195612 1.24198904 1.24211143 1.24181535 1.24194284 1.24200128
Epoch 2 RMSE: [1.24195612 1.24198904 1.24211143 1.24181535 1.24194284 1.24200128
Epoch 3 RMSE: [1.13247887 1.13249914 1.13258642 1.13239893 1.13245611 1.13250907
Epoch 3 RMSE: [1.13247887 1.13249914 1.13258642 1.13239893 1.13245611 1.13250907
Epoch 4 RMSE: [1.07474573 1.07476035 1.07482625 1.07468999 1.07472024 1.07476528
Epoch 4 RMSE: [1.07474573 1.07476035 1.07482625 1.07468999 1.07472024 1.07476528
Epoch 5 RMSE: [1.03819311 1.03820533 1.03825798 1.03815174 1.03816959 1.03820895
Epoch 5 RMSE: [1.03819311 1.03820533 1.03825798 1.03815174 1.03816959 1.03820895
Epoch 6 RMSE: [1.01305431 1.01306406 1.0131073  1.0130215  1.01303201 1.01306765
Epoch 6 RMSE: [1.01305431 1.01306406 1.0131073  1.0130215  1.01303201 1.01306765
Epoch 7 RMSE: [0.99453145 0.99453934 0.99457617 0.99450541 0.99451055 0.99454367
Epoch 7 RMSE: [0.99453145 0.99453934 0.99457617 0.99450541 0.99451055 0.99454367
Epoch 8 RMSE: [0.98059211 0.98059798 0.9806297  0.98057082 0.980572   0.98060328
Epoch 8 RMSE: [0.98059211 0.98059798 0.9806297  0.98057082 0.980572   0.98060328
Epoch 9 RMSE: [0.96964915 0.96965335 0.96968135 0.96963161 0.96962996 0.96965908
Epoch 9 RMSE: [0.96964915 0.96965335 0.96968135 0.96963161 0.96962996 0.96965908
Epoch 10 RMSE: [0.96082121 0.96082423 0.96084907 0.96080661 0.96080248 0.9608298
Epoch 10 RMSE: [0.96082121 0.96082423 0.96084907 0.96080661 0.96080248 0.9608298
Epoch 11 RMSE: [0.95365923 0.95366146 0.95368372 0.95364696 0.95364089 0.953667
Epoch 11 RMSE: [0.95365923 0.95366146 0.95368372 0.95364696 0.95364089 0.953667
Epoch 12 RMSE: [0.94766016 0.94766156 0.94768159 0.94764986 0.947642   0.94766674
Epoch 12 RMSE: [0.94766016 0.94766156 0.94768159 0.94764986 0.947642   0.94766674
Epoch 13 RMSE: [0.94261767 0.94261834 0.94263624 0.94260867 0.94259966 0.94262314
Epoch 13 RMSE: [0.94261767 0.94261834 0.94263624 0.94260867 0.94259966 0.94262314
Epoch 14 RMSE: [0.938169   0.9381694  0.93818542 0.93816133 0.93815137 0.93817355
Epoch 14 RMSE: [0.938169   0.9381694  0.93818542 0.93816133 0.93815137 0.93817355
Epoch 15 RMSE: [0.93426778 0.9342676  0.9342824  0.93426131 0.93425033 0.93427147
Epoch 15 RMSE: [0.93426778 0.9342676  0.9342824  0.93426131 0.93425033 0.93427147
Epoch 16 RMSE: [0.93070807 0.93070774 0.93072136 0.93070245 0.93069117 0.93071116
Epoch 16 RMSE: [0.93070807 0.93070774 0.93072136 0.93070245 0.93069117 0.93071116
Epoch 17 RMSE: [0.92736841 0.92736783 0.9273801  0.92736348 0.92735194 0.92737089
Epoch 17 RMSE: [0.92736841 0.92736783 0.9273801  0.92736348 0.92735194 0.92737089
Epoch 18 RMSE: [0.92401862 0.92401778 0.92402901 0.92401417 0.92400251 0.92402036
Epoch 18 RMSE: [0.92401862 0.92401778 0.92402901 0.92401417 0.92400251 0.92402036
Epoch 19 RMSE: [0.92070555 0.92070466 0.92071497 0.92070168 0.92069002 0.92070695
Epoch 19 RMSE: [0.92070555 0.92070466 0.92071497 0.92070168 0.92069002 0.92070695
Epoch 20 RMSE: [0.91707281 0.91707182 0.91708121 0.91706927 0.91705756 0.91707364
Epoch 20 RMSE: [0.91707281 0.91707182 0.91708121 0.91706927 0.91705756 0.91707364
Epoch 21 RMSE: [0.91348792 0.91348679 0.91349536 0.91348474 0.91347301 0.91348798
Epoch 21 RMSE: [0.91348792 0.91348679 0.91349536 0.91348474 0.91347301 0.91348798
Epoch 22 RMSE: [0.90957269 0.90957161 0.90957937 0.90956986 0.9095583  0.90957263
Epoch 22 RMSE: [0.90957269 0.90957161 0.90957937 0.90956986 0.9095583  0.90957263
Epoch 23 RMSE: [0.90514652 0.90514525 0.90515258 0.90514401 0.90513237 0.90514616
Epoch 23 RMSE: [0.90514652 0.90514525 0.90515258 0.90514401 0.90513237 0.90514616
Epoch 24 RMSE: [0.90024301 0.90024177 0.90024834 0.90024062 0.90022924 0.90024226
Epoch 24 RMSE: [0.90024301 0.90024177 0.90024834 0.90024062 0.90022924 0.90024226
Epoch 25 RMSE: [0.89486164 0.89486046 0.89486646 0.89485941 0.89484821 0.89486074
Epoch 25 RMSE: [0.89486164 0.89486046 0.89486646 0.89485941 0.89484821 0.89486074
Epoch 26 RMSE: [0.88903799 0.88903702 0.88904243 0.88903603 0.88902515 0.88903703
Epoch 26 RMSE: [0.88903799 0.88903702 0.88904243 0.88903603 0.88902515 0.88903703
Epoch 27 RMSE: [0.88287967 0.88287863 0.88288359 0.88287782 0.88286725 0.88287848
Epoch 27 RMSE: [0.88287967 0.88287863 0.88288359 0.88287782 0.88286725 0.88287848
Epoch 28 RMSE: [0.87627836 0.87627739 0.87628186 0.8762767  0.87626624 0.8762769
Epoch 28 RMSE: [0.87627836 0.87627739 0.87628186 0.8762767  0.87626624 0.8762769
Epoch 29 RMSE: [0.86929878 0.86929781 0.86930189 0.86929724 0.86928706 0.86929718
Epoch 29 RMSE: [0.86929878 0.86929781 0.86930189 0.86929724 0.86928706 0.86929718
Epoch 30 RMSE: [0.8620862  0.86208531 0.86208899 0.86208476 0.86207471 0.86208446
Epoch 30 RMSE: [0.8620862  0.86208531 0.86208899 0.86208476 0.86207471 0.86208446
Epoch 31 RMSE: [0.85452681 0.85452603 0.85452921 0.85452551 0.85451573 0.85452503
Epoch 31 RMSE: [0.85452681 0.85452603 0.85452921 0.85452551 0.85451573 0.85452503
Epoch 32 RMSE: [0.84666254 0.84666179 0.84666469 0.84666136 0.8466519  0.84666071
Epoch 32 RMSE: [0.84666254 0.84666179 0.84666469 0.84666136 0.8466519  0.84666071
Epoch 33 RMSE: [0.83814164 0.83814095 0.83814355 0.83814049 0.83813121 0.83813985
Epoch 33 RMSE: [0.83814164 0.83814095 0.83814355 0.83814049 0.83813121 0.83813985
Epoch 34 RMSE: [0.8295605  0.82955997 0.82956222 0.82955955 0.8295505  0.82955863
Epoch 34 RMSE: [0.8295605  0.82955997 0.82956222 0.82955955 0.8295505  0.82955863
Epoch 35 RMSE: [0.82044053 0.82044008 0.82044203 0.82043968 0.82043093 0.82043865
Epoch 35 RMSE: [0.82044053 0.82044008 0.82044203 0.82043968 0.82043093 0.82043865
Epoch 36 RMSE: [0.81096402 0.81096361 0.81096533 0.81096327 0.8109547  0.81096221
Epoch 36 RMSE: [0.81096402 0.81096361 0.81096533 0.81096327 0.8109547  0.81096221
Epoch 37 RMSE: [0.80132847 0.80132813 0.80132952 0.80132774 0.80131947 0.80132666
Epoch 37 RMSE: [0.80132847 0.80132813 0.80132952 0.80132774 0.80131947 0.80132666
Epoch 38 RMSE: [0.79114637 0.79114614 0.79114736 0.79114574 0.79113763 0.79114471
Epoch 38 RMSE: [0.79114637 0.79114614 0.79114736 0.79114574 0.79113763 0.79114471
Epoch 39 RMSE: [0.78071211 0.78071175 0.78071286 0.78071155 0.7807037  0.78071049
Epoch 39 RMSE: [0.78071211 0.78071175 0.78071286 0.78071155 0.7807037  0.78071049
Epoch 39 RMSE: [0.78071211 0.78071175 0.78071286 0.78071155 0.7807037  0.78071049
 0.78071326 0.78071879 0.78071336 0.7
model.estimations()

top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))

top_n.to_csv('Recommendations generated/ml-100k/Self_SVDBaseline_reco.csv', index=False, header=False)

estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))
estimations.to_csv('Recommendations generated/ml-100k/Self_SVDBaseline_estimations.csv', index=False, header=False)
import imp
imp.reload(ev)

import evaluation_measures as ev
dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)

ev.evaluate_all(test, dir_path, super_reactions)
943it [00:00, 11925.62it/s]
943it [00:00, 12690.64it/s]
943it [00:00, 11084.16it/s]
943it [00:00, 12561.94it/s]
943it [00:00, 11489.57it/s]
943it [00:00, 11776.67it/s]
943it [00:00, 10955.28it/s]
943it [00:00, 12078.77it/s]
943it [00:00, 11776.77it/s]
943it [00:00, 11631.38it/s]
943it [00:00, 11631.38it/s]
943it [00:00, 12235.67it/s]
943it [00:00, 11350.01it/s]
943it [00:00, 11702.10it/s]
943it [00:00, 12645.60it/s]
943it [00:00, 10586.01it/s]
943it [00:00, 11631.01it/s]
943it [00:00, 11631.32it/s]
Model RMSE MAE precision recall F_1 F_05 precision_super recall_super NDCG mAP MRR LAUC HR F_2 Whole_average Reco in test Test coverage Shannon Gini
0 Self_RP3Beta 3.702928 3.527713 0.322694 0.216069 0.212152 0.247538 0.245279 0.284983 0.388271 0.248239 0.636318 0.605683 0.910923 0.205450 0.376967 0.999788 0.178932 4.549663 0.950182
0 Self_P3 3.702446 3.527273 0.282185 0.192092 0.186749 0.216980 0.204185 0.240096 0.339114 0.204905 0.572157 0.593544 0.875928 0.181702 0.340803 1.000000 0.077201 3.875892 0.974947
0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 0.555546 0.765642 0.112750 0.249607 1.000000 0.038961 3.159079 0.987317
0 Self_SVDBaseline 3.645666 3.480246 0.137858 0.082398 0.084151 0.101063 0.107940 0.109393 0.164477 0.082973 0.342374 0.538097 0.638388 0.079860 0.205748 0.999894 0.279221 5.159076 0.907220
0 Ready_SVD 0.950835 0.748676 0.097879 0.048335 0.053780 0.068420 0.086159 0.080289 0.113553 0.054094 0.249037 0.520893 0.498409 0.048439 0.159941 0.997985 0.204906 4.395721 0.954872
0 Self_SVD 0.914890 0.717962 0.102969 0.042325 0.052022 0.069313 0.093562 0.074994 0.105416 0.050278 0.191533 0.517890 0.462354 0.044591 0.150604 0.867656 0.141414 3.929249 0.971112
0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 0.515501 0.437964 0.039549 0.141900 1.000000 0.033911 2.836513 0.991139
0 Ready_SVDBiased 0.943277 0.743628 0.080912 0.033048 0.040445 0.053881 0.070815 0.049631 0.090496 0.041928 0.200192 0.513176 0.411453 0.034776 0.135063 0.998727 0.168110 4.165618 0.964211
0 Self_KNNSurprisetask 0.946255 0.745209 0.083457 0.032848 0.041227 0.055493 0.074785 0.048890 0.089577 0.040902 0.189057 0.513076 0.417815 0.034996 0.135177 0.888547 0.130592 3.611806 0.978659
0 Self_TopRated 2.508258 2.217909 0.079321 0.032667 0.039983 0.053170 0.068884 0.048582 0.070766 0.027602 0.114790 0.512943 0.411453 0.034385 0.124546 1.000000 0.024531 2.761238 0.991660
0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 0.509546 0.384942 0.027213 0.118383 1.000000 0.025974 2.711772 0.992003
0 Ready_Random 1.514265 1.215956 0.048780 0.021007 0.024667 0.032495 0.031867 0.023414 0.052904 0.020511 0.126790 0.507024 0.322375 0.021635 0.102789 0.988017 0.183983 5.100443 0.906900
0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 0.016046 0.021137 0.009522 0.024214 0.008958 0.048068 0.499885 0.154825 0.008007 0.069521 0.402333 0.434343 5.133650 0.877999
0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 0.496724 0.021209 0.000862 0.045379 0.482821 0.059885 2.232578 0.994487
0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 0.000449 0.000536 0.000198 0.000845 0.000274 0.002744 0.496441 0.007423 0.000235 0.042533 0.602121 0.010823 2.089186 0.995706
0 Self_BaselineIU 0.958136 0.754051 0.000954 0.000188 0.000298 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 0.496433 0.009544 0.000220 0.042809 0.699046 0.005051 1.945910 0.995669
0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 0.496424 0.009544 0.000201 0.042622 0.600530 0.005051 1.803126 0.996380
0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 0.496391 0.003181 0.000118 0.041755 0.392153 0.115440 4.174741 0.965327

Ready-made SVD - Surprise implementation

SVD

import helpers
import surprise as sp
import imp
imp.reload(helpers)

algo = sp.SVD(biased=False) # to use unbiased version

helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_SVD_reco.csv',
          estimations_path='Recommendations generated/ml-100k/Ready_SVD_estimations.csv')
Generating predictions...
Generating top N recommendations...
Generating predictions...

SVD biased - on top baseline

import helpers
import surprise as sp
import imp
imp.reload(helpers)

algo = sp.SVD() # default is biased=True

helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_SVDBiased_reco.csv',
          estimations_path='Recommendations generated/ml-100k/Ready_SVDBiased_estimations.csv')
Generating predictions...
Generating top N recommendations...
Generating predictions...
import imp
imp.reload(ev)

import evaluation_measures as ev
dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)

ev.evaluate_all(test, dir_path, super_reactions)
943it [00:00, 11925.95it/s]
943it [00:00, 12314.95it/s]
943it [00:00, 10706.18it/s]
943it [00:00, 11925.84it/s]
943it [00:00, 11349.81it/s]
943it [00:00, 11925.98it/s]
943it [00:00, 11018.70it/s]
943it [00:00, 12396.58it/s]
943it [00:00, 12562.02it/s]
943it [00:00, 11489.71it/s]
943it [00:00, 11631.38it/s]
943it [00:00, 12210.06it/s]
943it [00:00, 11629.33it/s]