Self made SVD

import helpers
import pandas as pd
import numpy as np
import scipy.sparse as sparse
from collections import defaultdict
from itertools import chain
import random

train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\t', header=None)
test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)
train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)

# Done similarly to https://github.com/albertauyeung/matrix-factorization-in-python
from tqdm import tqdm

class SVD():
    
    def __init__(self, train_ui, learning_rate, regularization, nb_factors, iterations):
        self.train_ui=train_ui
        self.uir=list(zip(*[train_ui.nonzero()[0],train_ui.nonzero()[1], train_ui.data]))
        
        self.learning_rate=learning_rate
        self.regularization=regularization
        self.iterations=iterations
        self.nb_users, self.nb_items=train_ui.shape
        self.nb_ratings=train_ui.nnz
        self.nb_factors=nb_factors
        
        self.Pu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))
        self.Qi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))

    def train(self, test_ui=None):
        if test_ui!=None:
            self.test_uir=list(zip(*[test_ui.nonzero()[0],test_ui.nonzero()[1], test_ui.data]))
            
        self.learning_process=[]
        pbar = tqdm(range(self.iterations))
        for i in pbar:
            pbar.set_description(f'Epoch {i} RMSE: {self.learning_process[-1][1] if i>0 else 0}. Training epoch {i+1}...')
            np.random.shuffle(self.uir)
            self.sgd(self.uir)
            if test_ui==None:
                self.learning_process.append([i+1, self.RMSE_total(self.uir)])
            else:
                self.learning_process.append([i+1, self.RMSE_total(self.uir), self.RMSE_total(self.test_uir)])
    
    def sgd(self, uir):
        
        for u, i, score in uir:
            # Computer prediction and error
            prediction = self.get_rating(u,i)
            e = (score - prediction)
            
            # Update user and item latent feature matrices
            Pu_update=self.learning_rate * (e * self.Qi[i] - self.regularization * self.Pu[u])
            Qi_update=self.learning_rate * (e * self.Pu[u] - self.regularization * self.Qi[i])
            
            self.Pu[u] += Pu_update
            self.Qi[i] += Qi_update
        
    def get_rating(self, u, i):
        prediction = self.Pu[u].dot(self.Qi[i].T)
        return prediction
    
    def RMSE_total(self, uir):
        RMSE=0
        for u,i, score in uir:
            prediction = self.get_rating(u,i)
            RMSE+=(score - prediction)**2
        return np.sqrt(RMSE/len(uir))
    
    def estimations(self):
        self.estimations=\
        np.dot(self.Pu,self.Qi.T)

    def recommend(self, user_code_id, item_code_id, topK=10):
        
        top_k = defaultdict(list)
        for nb_user, user in enumerate(self.estimations):
            
            user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]
            for item, score in enumerate(user):
                if item not in user_rated and not np.isnan(score):
                    top_k[user_code_id[nb_user]].append((item_code_id[item], score))
        result=[]
        # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)
        for uid, item_scores in top_k.items():
            item_scores.sort(key=lambda x: x[1], reverse=True)
            result.append([uid]+list(chain(*item_scores[:topK])))
        return result
    
    def estimate(self, user_code_id, item_code_id, test_ui):
        result=[]
        for user, item in zip(*test_ui.nonzero()):
            result.append([user_code_id[user], item_code_id[item], 
                           self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])
        return result

model=SVD(train_ui, learning_rate=0.005, regularization=0.02, nb_factors=100, iterations=40)
model.train(test_ui)

Epoch 39 RMSE: 0.7469054750619549. Training epoch 40...: 100%|█████████████████████████| 40/40 [01:10<00:00,  1.76s/it]

import matplotlib.pyplot as plt

df=pd.DataFrame(model.learning_process).iloc[:,:2]
df.columns=['epoch', 'train_RMSE']
plt.plot('epoch', 'train_RMSE', data=df, color='blue')
plt.legend()

<matplotlib.legend.Legend at 0x19ac6c66f08>

import matplotlib.pyplot as plt

df=pd.DataFrame(model.learning_process[10:], columns=['epoch', 'train_RMSE', 'test_RMSE'])
plt.plot('epoch', 'train_RMSE', data=df, color='blue')
plt.plot('epoch', 'test_RMSE', data=df, color='yellow', linestyle='dashed')
plt.legend()

<matplotlib.legend.Legend at 0x19ac1ce8308>

Saving and evaluating recommendations

model.estimations()

top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))

top_n.to_csv('Recommendations generated/ml-100k/Self_SVD_reco.csv', index=False, header=False)

estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))
estimations.to_csv('Recommendations generated/ml-100k/Self_SVD_estimations.csv', index=False, header=False)

import evaluation_measures as ev

estimations_df=pd.read_csv('Recommendations generated/ml-100k/Self_SVD_estimations.csv', header=None)
reco=np.loadtxt('Recommendations generated/ml-100k/Self_SVD_reco.csv', delimiter=',')

ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None),
            estimations_df=estimations_df, 
            reco=reco,
            super_reactions=[4,5])

943it [00:00, 11351.12it/s]

	RMSE	MAE	precision	recall	F_1	F_05	precision_super	recall_super	NDCG	mAP	MRR	LAUC	HR	F_2	Whole_average	Reco in test	Test coverage	Shannon	Gini
0	0.91489	0.717962	0.102969	0.042325	0.052022	0.069313	0.093562	0.074994	0.105416	0.050278	0.191533	0.51789	0.462354	0.044591	0.150604	0.867656	0.141414	3.929249	0.971112

import imp
imp.reload(ev)

import evaluation_measures as ev
dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)

ev.evaluate_all(test, dir_path, super_reactions)

943it [00:00, 11925.95it/s]
943it [00:00, 12314.60it/s]
943it [00:00, 10828.94it/s]
943it [00:00, 11925.91it/s]
943it [00:00, 11489.44it/s]
943it [00:00, 11489.57it/s]
943it [00:00, 10353.22it/s]
943it [00:00, 11925.88it/s]
943it [00:00, 11631.38it/s]
943it [00:00, 11631.42it/s]
943it [00:00, 10585.78it/s]
943it [00:00, 11215.99it/s]
943it [00:00, 11215.93it/s]
943it [00:00, 11776.84it/s]
943it [00:00, 12396.27it/s]
943it [00:00, 10468.25it/s]
943it [00:00, 11972.70it/s]
943it [00:00, 11925.84it/s]

Model	RMSE	MAE	precision	recall	F_1	F_05	precision_super	recall_super	NDCG	mAP	MRR	LAUC	HR	F_2	Whole_average	Reco in test	Test coverage	Shannon	Gini
Self_RP3Beta	3.702928	3.527713	0.322694	0.216069	0.212152	0.247538	0.245279	0.284983	0.388271	0.248239	0.636318	0.605683	0.910923	0.205450	0.376967	0.999788	0.178932	4.549663	0.950182
Self_P3	3.702446	3.527273	0.282185	0.192092	0.186749	0.216980	0.204185	0.240096	0.339114	0.204905	0.572157	0.593544	0.875928	0.181702	0.340803	1.000000	0.077201	3.875892	0.974947
Self_TopPop	2.508258	2.217909	0.188865	0.116919	0.118732	0.141584	0.130472	0.137473	0.214651	0.111707	0.400939	0.555546	0.765642	0.112750	0.249607	1.000000	0.038961	3.159079	0.987317
Self_SVDBaseline	3.645871	3.480308	0.135949	0.078868	0.082011	0.099188	0.106974	0.103767	0.159486	0.079783	0.328576	0.536311	0.632025	0.077145	0.201674	0.999894	0.281385	5.140721	0.909056
Ready_SVD	0.950835	0.748676	0.097879	0.048335	0.053780	0.068420	0.086159	0.080289	0.113553	0.054094	0.249037	0.520893	0.498409	0.048439	0.159941	0.997985	0.204906	4.395721	0.954872
Self_SVD	0.914890	0.717962	0.102969	0.042325	0.052022	0.069313	0.093562	0.074994	0.105416	0.050278	0.191533	0.517890	0.462354	0.044591	0.150604	0.867656	0.141414	3.929249	0.971112
Ready_Baseline	0.949459	0.752487	0.091410	0.037652	0.046030	0.061286	0.079614	0.056463	0.095957	0.043178	0.198193	0.515501	0.437964	0.039549	0.141900	1.000000	0.033911	2.836513	0.991139
Ready_SVDBiased	0.943277	0.743628	0.080912	0.033048	0.040445	0.053881	0.070815	0.049631	0.090496	0.041928	0.200192	0.513176	0.411453	0.034776	0.135063	0.998727	0.168110	4.165618	0.964211
Self_KNNSurprisetask	0.946255	0.745209	0.083457	0.032848	0.041227	0.055493	0.074785	0.048890	0.089577	0.040902	0.189057	0.513076	0.417815	0.034996	0.135177	0.888547	0.130592	3.611806	0.978659
Self_TopRated	2.508258	2.217909	0.079321	0.032667	0.039983	0.053170	0.068884	0.048582	0.070766	0.027602	0.114790	0.512943	0.411453	0.034385	0.124546	1.000000	0.024531	2.761238	0.991660
Self_GlobalAvg	1.125760	0.943534	0.061188	0.025968	0.031383	0.041343	0.040558	0.032107	0.067695	0.027470	0.171187	0.509546	0.384942	0.027213	0.118383	1.000000	0.025974	2.711772	0.992003
Ready_Random	1.514265	1.215956	0.048780	0.021007	0.024667	0.032495	0.031867	0.023414	0.052904	0.020511	0.126790	0.507024	0.322375	0.021635	0.102789	0.988017	0.183983	5.100443	0.906900
Ready_I-KNN	1.030386	0.813067	0.026087	0.006908	0.010593	0.016046	0.021137	0.009522	0.024214	0.008958	0.048068	0.499885	0.154825	0.008007	0.069521	0.402333	0.434343	5.133650	0.877999
Ready_I-KNNBaseline	0.935327	0.737424	0.002545	0.000755	0.001105	0.001602	0.002253	0.000930	0.003444	0.001362	0.011760	0.496724	0.021209	0.000862	0.045379	0.482821	0.059885	2.232578	0.994487
Ready_U-KNN	1.023495	0.807913	0.000742	0.000205	0.000305	0.000449	0.000536	0.000198	0.000845	0.000274	0.002744	0.496441	0.007423	0.000235	0.042533	0.602121	0.010823	2.089186	0.995706
Self_BaselineIU	0.958136	0.754051	0.000954	0.000188	0.000298	0.000481	0.000644	0.000223	0.001043	0.000335	0.003348	0.496433	0.009544	0.000220	0.042809	0.699046	0.005051	1.945910	0.995669
Self_BaselineUI	0.967585	0.762740	0.000954	0.000170	0.000278	0.000463	0.000644	0.000189	0.000752	0.000168	0.001677	0.496424	0.009544	0.000201	0.042622	0.600530	0.005051	1.803126	0.996380
Self_IKNN	1.018363	0.808793	0.000318	0.000108	0.000140	0.000189	0.000000	0.000000	0.000214	0.000037	0.000368	0.496391	0.003181	0.000118	0.041755	0.392153	0.115440	4.174741	0.965327

Embeddings

x=np.array([[1,2],[3,4]])
display(x)
x/np.linalg.norm(x, axis=1)[:,None]

array([[1, 2],
       [3, 4]])

array([[0.4472136 , 0.89442719],
       [0.6       , 0.8       ]])

item=random.choice(list(set(train_ui.indices)))

embeddings_norm=model.Qi/np.linalg.norm(model.Qi, axis=1)[:,None] # we do not mean-center here
# omitting normalization also makes sense, but items with a greater magnitude will be recommended more often

similarity_scores=np.dot(embeddings_norm,embeddings_norm[item].T)
top_similar_items=pd.DataFrame(enumerate(similarity_scores), columns=['code', 'score'])\
.sort_values(by=['score'], ascending=[False])[:10]

top_similar_items['item_id']=top_similar_items['code'].apply(lambda x: item_code_id[x])

items=pd.read_csv('./Datasets/ml-100k/movies.csv')

result=pd.merge(top_similar_items, items, left_on='item_id', right_on='id')

result

	code	score	item_id	id	title	genres
0	423	1.000000	424	424	Children of the Corn: The Gathering (1996)	Horror
1	984	0.980977	985	985	Blood & Wine (1997)	Drama
2	1458	0.980754	1459	1459	Madame Butterfly (1995)	Musical
3	1278	0.980699	1279	1279	Wild America (1997)	Adventure, Children's
4	1380	0.980041	1381	1381	Losing Chase (1996)	Drama
5	744	0.979974	745	745	Ruling Class, The (1972)	Comedy
6	705	0.979904	706	706	Bad Moon (1996)	Horror
7	1085	0.979812	1086	1086	It's My Party (1995)	Drama
8	1237	0.979443	1238	1238	Full Speed (1996)	Drama
9	1190	0.979429	1191	1191	Letter From Death Row, A (1998)	Crime, Drama

project task 5: implement SVD on top baseline (as it is in Surprise library)

# making changes to our implementation by considering additional parameters in the gradient descent procedure 
# seems to be the fastest option
# please save the output in 'Recommendations generated/ml-100k/Self_SVDBaseline_reco.csv' and
# 'Recommendations generated/ml-100k/Self_SVDBaseline_estimations.csv'
# Done similarly to https://github.com/albertauyeung/matrix-factorization-in-python
from tqdm import tqdm

class SVDboosted():
    
    def __init__(self, train_ui, learning_rate, regularization, nb_factors, iterations):
        self.train_ui=train_ui
        self.uir=list(zip(*[train_ui.nonzero()[0],train_ui.nonzero()[1], train_ui.data]))
        
        self.learning_rate=learning_rate
        self.regularization=regularization
        self.iterations=iterations
        self.nb_users, self.nb_items=train_ui.shape
        self.nb_ratings=train_ui.nnz
        self.nb_factors=nb_factors
        
        ###################################################
        #TASK
        self.Bu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))
        self.Bi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))
        ###################################################
        
        self.Pu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))
        self.Qi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))

    def train(self, test_ui=None):
        if test_ui!=None:
            self.test_uir=list(zip(*[test_ui.nonzero()[0],test_ui.nonzero()[1], test_ui.data]))
            
        self.learning_process=[]
        pbar = tqdm(range(self.iterations))
        for i in pbar:
            pbar.set_description(f'Epoch {i} RMSE: {self.learning_process[-1][1] if i>0 else 0}. Training epoch {i+1}...')
            np.random.shuffle(self.uir)
            self.sgd(self.uir)
            if test_ui==None:
                self.learning_process.append([i+1, self.RMSE_total(self.uir)])
            else:
                self.learning_process.append([i+1, self.RMSE_total(self.uir), self.RMSE_total(self.test_uir)])
    
    def sgd(self, uir):
        
        for u, i, score in uir:
            # Computer prediction and error
            prediction = self.get_rating(u,i)
            e = (score - prediction)
            
            # Update user and item latent feature matrices
            Pu_update=self.learning_rate * (e * self.Qi[i] - self.regularization * self.Pu[u])
            Qi_update=self.learning_rate * (e * self.Pu[u] - self.regularization * self.Qi[i])
            
            ###################################################
            #TASK
            Bu_update = self.learning_rate * (e - self.regularization * self.Bu[u])
            Bi_update = self.learning_rate * (e - self.regularization * self.Bi[i])
            
            self.Bu[u] += Bu_update
            self.Bi[i] += Bi_update
            ###################################################
            self.Pu[u] += Pu_update
            self.Qi[i] += Qi_update
        
    def get_rating(self, u, i):
        prediction = self.Bu[u] + self.Bi[i] + self.Pu[u].dot(self.Qi[i].T)
        return prediction
    
    def RMSE_total(self, uir):
        RMSE=0
        for u,i, score in uir:
            prediction = self.get_rating(u,i)
            ###################################################
            #TASK
            RMSE+=(score - prediction)**2
            #+ self.regularization * (self.Bi[i]**2 + self.Bu[u]**2 + self.Qi[i]**2 + self.Pu[u]**2)
            ###################################################
        return np.sqrt(RMSE/len(uir))
    
    def estimations(self):
        self.estimations=\
        np.dot(self.Pu,self.Qi.T)

    def recommend(self, user_code_id, item_code_id, topK=10):
        
        top_k = defaultdict(list)
        for nb_user, user in enumerate(self.estimations):
            
            user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]
            for item, score in enumerate(user):
                if item not in user_rated and not np.isnan(score):
                    top_k[user_code_id[nb_user]].append((item_code_id[item], score))
        result=[]
        # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)
        for uid, item_scores in top_k.items():
            item_scores.sort(key=lambda x: x[1], reverse=True)
            result.append([uid]+list(chain(*item_scores[:topK])))
        return result
    
    def estimate(self, user_code_id, item_code_id, test_ui):
        result=[]
        for user, item in zip(*test_ui.nonzero()):
            result.append([user_code_id[user], item_code_id[item], 
                           self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])
        return result

model=SVDboosted(train_ui, learning_rate=0.005, regularization=0.02, nb_factors=100, iterations=40)
model.train(test_ui)

Epoch 1 RMSE: [1.56545038 1.56550953 1.56561831 1.56506255 1.56544717 1.56542472        | 1/40 [00:02<01:50,  2.83s/it]
Epoch 1 RMSE: [1.56545038 1.56550953 1.56561831 1.56506255 1.56544717 1.56542472
Epoch 2 RMSE: [1.24195612 1.24198904 1.24211143 1.24181535 1.24194284 1.24200128
Epoch 2 RMSE: [1.24195612 1.24198904 1.24211143 1.24181535 1.24194284 1.24200128
Epoch 3 RMSE: [1.13247887 1.13249914 1.13258642 1.13239893 1.13245611 1.13250907
Epoch 3 RMSE: [1.13247887 1.13249914 1.13258642 1.13239893 1.13245611 1.13250907
Epoch 4 RMSE: [1.07474573 1.07476035 1.07482625 1.07468999 1.07472024 1.07476528
Epoch 4 RMSE: [1.07474573 1.07476035 1.07482625 1.07468999 1.07472024 1.07476528
Epoch 5 RMSE: [1.03819311 1.03820533 1.03825798 1.03815174 1.03816959 1.03820895
Epoch 5 RMSE: [1.03819311 1.03820533 1.03825798 1.03815174 1.03816959 1.03820895
Epoch 6 RMSE: [1.01305431 1.01306406 1.0131073  1.0130215  1.01303201 1.01306765
Epoch 6 RMSE: [1.01305431 1.01306406 1.0131073  1.0130215  1.01303201 1.01306765
Epoch 7 RMSE: [0.99453145 0.99453934 0.99457617 0.99450541 0.99451055 0.99454367
Epoch 7 RMSE: [0.99453145 0.99453934 0.99457617 0.99450541 0.99451055 0.99454367
Epoch 8 RMSE: [0.98059211 0.98059798 0.9806297  0.98057082 0.980572   0.98060328
Epoch 8 RMSE: [0.98059211 0.98059798 0.9806297  0.98057082 0.980572   0.98060328
Epoch 9 RMSE: [0.96964915 0.96965335 0.96968135 0.96963161 0.96962996 0.96965908
Epoch 9 RMSE: [0.96964915 0.96965335 0.96968135 0.96963161 0.96962996 0.96965908
Epoch 10 RMSE: [0.96082121 0.96082423 0.96084907 0.96080661 0.96080248 0.9608298
Epoch 10 RMSE: [0.96082121 0.96082423 0.96084907 0.96080661 0.96080248 0.9608298
Epoch 11 RMSE: [0.95365923 0.95366146 0.95368372 0.95364696 0.95364089 0.953667
Epoch 11 RMSE: [0.95365923 0.95366146 0.95368372 0.95364696 0.95364089 0.953667
Epoch 12 RMSE: [0.94766016 0.94766156 0.94768159 0.94764986 0.947642   0.94766674
Epoch 12 RMSE: [0.94766016 0.94766156 0.94768159 0.94764986 0.947642   0.94766674
Epoch 13 RMSE: [0.94261767 0.94261834 0.94263624 0.94260867 0.94259966 0.94262314
Epoch 13 RMSE: [0.94261767 0.94261834 0.94263624 0.94260867 0.94259966 0.94262314
Epoch 14 RMSE: [0.938169   0.9381694  0.93818542 0.93816133 0.93815137 0.93817355
Epoch 14 RMSE: [0.938169   0.9381694  0.93818542 0.93816133 0.93815137 0.93817355
Epoch 15 RMSE: [0.93426778 0.9342676  0.9342824  0.93426131 0.93425033 0.93427147
Epoch 15 RMSE: [0.93426778 0.9342676  0.9342824  0.93426131 0.93425033 0.93427147
Epoch 16 RMSE: [0.93070807 0.93070774 0.93072136 0.93070245 0.93069117 0.93071116
Epoch 16 RMSE: [0.93070807 0.93070774 0.93072136 0.93070245 0.93069117 0.93071116
Epoch 17 RMSE: [0.92736841 0.92736783 0.9273801  0.92736348 0.92735194 0.92737089
Epoch 17 RMSE: [0.92736841 0.92736783 0.9273801  0.92736348 0.92735194 0.92737089
Epoch 18 RMSE: [0.92401862 0.92401778 0.92402901 0.92401417 0.92400251 0.92402036
Epoch 18 RMSE: [0.92401862 0.92401778 0.92402901 0.92401417 0.92400251 0.92402036
Epoch 19 RMSE: [0.92070555 0.92070466 0.92071497 0.92070168 0.92069002 0.92070695
Epoch 19 RMSE: [0.92070555 0.92070466 0.92071497 0.92070168 0.92069002 0.92070695
Epoch 20 RMSE: [0.91707281 0.91707182 0.91708121 0.91706927 0.91705756 0.91707364
Epoch 20 RMSE: [0.91707281 0.91707182 0.91708121 0.91706927 0.91705756 0.91707364
Epoch 21 RMSE: [0.91348792 0.91348679 0.91349536 0.91348474 0.91347301 0.91348798
Epoch 21 RMSE: [0.91348792 0.91348679 0.91349536 0.91348474 0.91347301 0.91348798
Epoch 22 RMSE: [0.90957269 0.90957161 0.90957937 0.90956986 0.9095583  0.90957263
Epoch 22 RMSE: [0.90957269 0.90957161 0.90957937 0.90956986 0.9095583  0.90957263
Epoch 23 RMSE: [0.90514652 0.90514525 0.90515258 0.90514401 0.90513237 0.90514616
Epoch 23 RMSE: [0.90514652 0.90514525 0.90515258 0.90514401 0.90513237 0.90514616
Epoch 24 RMSE: [0.90024301 0.90024177 0.90024834 0.90024062 0.90022924 0.90024226
Epoch 24 RMSE: [0.90024301 0.90024177 0.90024834 0.90024062 0.90022924 0.90024226
Epoch 25 RMSE: [0.89486164 0.89486046 0.89486646 0.89485941 0.89484821 0.89486074
Epoch 25 RMSE: [0.89486164 0.89486046 0.89486646 0.89485941 0.89484821 0.89486074
Epoch 26 RMSE: [0.88903799 0.88903702 0.88904243 0.88903603 0.88902515 0.88903703
Epoch 26 RMSE: [0.88903799 0.88903702 0.88904243 0.88903603 0.88902515 0.88903703
Epoch 27 RMSE: [0.88287967 0.88287863 0.88288359 0.88287782 0.88286725 0.88287848
Epoch 27 RMSE: [0.88287967 0.88287863 0.88288359 0.88287782 0.88286725 0.88287848
Epoch 28 RMSE: [0.87627836 0.87627739 0.87628186 0.8762767  0.87626624 0.8762769
Epoch 28 RMSE: [0.87627836 0.87627739 0.87628186 0.8762767  0.87626624 0.8762769
Epoch 29 RMSE: [0.86929878 0.86929781 0.86930189 0.86929724 0.86928706 0.86929718
Epoch 29 RMSE: [0.86929878 0.86929781 0.86930189 0.86929724 0.86928706 0.86929718
Epoch 30 RMSE: [0.8620862  0.86208531 0.86208899 0.86208476 0.86207471 0.86208446
Epoch 30 RMSE: [0.8620862  0.86208531 0.86208899 0.86208476 0.86207471 0.86208446
Epoch 31 RMSE: [0.85452681 0.85452603 0.85452921 0.85452551 0.85451573 0.85452503
Epoch 31 RMSE: [0.85452681 0.85452603 0.85452921 0.85452551 0.85451573 0.85452503
Epoch 32 RMSE: [0.84666254 0.84666179 0.84666469 0.84666136 0.8466519  0.84666071
Epoch 32 RMSE: [0.84666254 0.84666179 0.84666469 0.84666136 0.8466519  0.84666071
Epoch 33 RMSE: [0.83814164 0.83814095 0.83814355 0.83814049 0.83813121 0.83813985
Epoch 33 RMSE: [0.83814164 0.83814095 0.83814355 0.83814049 0.83813121 0.83813985
Epoch 34 RMSE: [0.8295605  0.82955997 0.82956222 0.82955955 0.8295505  0.82955863
Epoch 34 RMSE: [0.8295605  0.82955997 0.82956222 0.82955955 0.8295505  0.82955863
Epoch 35 RMSE: [0.82044053 0.82044008 0.82044203 0.82043968 0.82043093 0.82043865
Epoch 35 RMSE: [0.82044053 0.82044008 0.82044203 0.82043968 0.82043093 0.82043865
Epoch 36 RMSE: [0.81096402 0.81096361 0.81096533 0.81096327 0.8109547  0.81096221
Epoch 36 RMSE: [0.81096402 0.81096361 0.81096533 0.81096327 0.8109547  0.81096221
Epoch 37 RMSE: [0.80132847 0.80132813 0.80132952 0.80132774 0.80131947 0.80132666
Epoch 37 RMSE: [0.80132847 0.80132813 0.80132952 0.80132774 0.80131947 0.80132666
Epoch 38 RMSE: [0.79114637 0.79114614 0.79114736 0.79114574 0.79113763 0.79114471
Epoch 38 RMSE: [0.79114637 0.79114614 0.79114736 0.79114574 0.79113763 0.79114471
Epoch 39 RMSE: [0.78071211 0.78071175 0.78071286 0.78071155 0.7807037  0.78071049
Epoch 39 RMSE: [0.78071211 0.78071175 0.78071286 0.78071155 0.7807037  0.78071049
Epoch 39 RMSE: [0.78071211 0.78071175 0.78071286 0.78071155 0.7807037  0.78071049
 0.78071326 0.78071879 0.78071336 0.7

model.estimations()

top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))

top_n.to_csv('Recommendations generated/ml-100k/Self_SVDBaseline_reco.csv', index=False, header=False)

estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))
estimations.to_csv('Recommendations generated/ml-100k/Self_SVDBaseline_estimations.csv', index=False, header=False)

import imp
imp.reload(ev)

import evaluation_measures as ev
dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)

ev.evaluate_all(test, dir_path, super_reactions)

943it [00:00, 11925.62it/s]
943it [00:00, 12690.64it/s]
943it [00:00, 11084.16it/s]
943it [00:00, 12561.94it/s]
943it [00:00, 11489.57it/s]
943it [00:00, 11776.67it/s]
943it [00:00, 10955.28it/s]
943it [00:00, 12078.77it/s]
943it [00:00, 11776.77it/s]
943it [00:00, 11631.38it/s]
943it [00:00, 11631.38it/s]
943it [00:00, 12235.67it/s]
943it [00:00, 11350.01it/s]
943it [00:00, 11702.10it/s]
943it [00:00, 12645.60it/s]
943it [00:00, 10586.01it/s]
943it [00:00, 11631.01it/s]
943it [00:00, 11631.32it/s]

Model	RMSE	MAE	precision	recall	F_1	F_05	precision_super	recall_super	NDCG	mAP	MRR	LAUC	HR	F_2	Whole_average	Reco in test	Test coverage	Shannon	Gini
Self_RP3Beta	3.702928	3.527713	0.322694	0.216069	0.212152	0.247538	0.245279	0.284983	0.388271	0.248239	0.636318	0.605683	0.910923	0.205450	0.376967	0.999788	0.178932	4.549663	0.950182
Self_P3	3.702446	3.527273	0.282185	0.192092	0.186749	0.216980	0.204185	0.240096	0.339114	0.204905	0.572157	0.593544	0.875928	0.181702	0.340803	1.000000	0.077201	3.875892	0.974947
Self_TopPop	2.508258	2.217909	0.188865	0.116919	0.118732	0.141584	0.130472	0.137473	0.214651	0.111707	0.400939	0.555546	0.765642	0.112750	0.249607	1.000000	0.038961	3.159079	0.987317
Self_SVDBaseline	3.645666	3.480246	0.137858	0.082398	0.084151	0.101063	0.107940	0.109393	0.164477	0.082973	0.342374	0.538097	0.638388	0.079860	0.205748	0.999894	0.279221	5.159076	0.907220
Ready_SVD	0.950835	0.748676	0.097879	0.048335	0.053780	0.068420	0.086159	0.080289	0.113553	0.054094	0.249037	0.520893	0.498409	0.048439	0.159941	0.997985	0.204906	4.395721	0.954872
Self_SVD	0.914890	0.717962	0.102969	0.042325	0.052022	0.069313	0.093562	0.074994	0.105416	0.050278	0.191533	0.517890	0.462354	0.044591	0.150604	0.867656	0.141414	3.929249	0.971112
Ready_Baseline	0.949459	0.752487	0.091410	0.037652	0.046030	0.061286	0.079614	0.056463	0.095957	0.043178	0.198193	0.515501	0.437964	0.039549	0.141900	1.000000	0.033911	2.836513	0.991139
Ready_SVDBiased	0.943277	0.743628	0.080912	0.033048	0.040445	0.053881	0.070815	0.049631	0.090496	0.041928	0.200192	0.513176	0.411453	0.034776	0.135063	0.998727	0.168110	4.165618	0.964211
Self_KNNSurprisetask	0.946255	0.745209	0.083457	0.032848	0.041227	0.055493	0.074785	0.048890	0.089577	0.040902	0.189057	0.513076	0.417815	0.034996	0.135177	0.888547	0.130592	3.611806	0.978659
Self_TopRated	2.508258	2.217909	0.079321	0.032667	0.039983	0.053170	0.068884	0.048582	0.070766	0.027602	0.114790	0.512943	0.411453	0.034385	0.124546	1.000000	0.024531	2.761238	0.991660
Self_GlobalAvg	1.125760	0.943534	0.061188	0.025968	0.031383	0.041343	0.040558	0.032107	0.067695	0.027470	0.171187	0.509546	0.384942	0.027213	0.118383	1.000000	0.025974	2.711772	0.992003
Ready_Random	1.514265	1.215956	0.048780	0.021007	0.024667	0.032495	0.031867	0.023414	0.052904	0.020511	0.126790	0.507024	0.322375	0.021635	0.102789	0.988017	0.183983	5.100443	0.906900
Ready_I-KNN	1.030386	0.813067	0.026087	0.006908	0.010593	0.016046	0.021137	0.009522	0.024214	0.008958	0.048068	0.499885	0.154825	0.008007	0.069521	0.402333	0.434343	5.133650	0.877999
Ready_I-KNNBaseline	0.935327	0.737424	0.002545	0.000755	0.001105	0.001602	0.002253	0.000930	0.003444	0.001362	0.011760	0.496724	0.021209	0.000862	0.045379	0.482821	0.059885	2.232578	0.994487
Ready_U-KNN	1.023495	0.807913	0.000742	0.000205	0.000305	0.000449	0.000536	0.000198	0.000845	0.000274	0.002744	0.496441	0.007423	0.000235	0.042533	0.602121	0.010823	2.089186	0.995706
Self_BaselineIU	0.958136	0.754051	0.000954	0.000188	0.000298	0.000481	0.000644	0.000223	0.001043	0.000335	0.003348	0.496433	0.009544	0.000220	0.042809	0.699046	0.005051	1.945910	0.995669
Self_BaselineUI	0.967585	0.762740	0.000954	0.000170	0.000278	0.000463	0.000644	0.000189	0.000752	0.000168	0.001677	0.496424	0.009544	0.000201	0.042622	0.600530	0.005051	1.803126	0.996380
Self_IKNN	1.018363	0.808793	0.000318	0.000108	0.000140	0.000189	0.000000	0.000000	0.000214	0.000037	0.000368	0.496391	0.003181	0.000118	0.041755	0.392153	0.115440	4.174741	0.965327

Ready-made SVD - Surprise implementation

SVD

import helpers
import surprise as sp
import imp
imp.reload(helpers)

algo = sp.SVD(biased=False) # to use unbiased version

helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_SVD_reco.csv',
          estimations_path='Recommendations generated/ml-100k/Ready_SVD_estimations.csv')

Generating predictions...
Generating top N recommendations...
Generating predictions...

SVD biased - on top baseline

import helpers
import surprise as sp
import imp
imp.reload(helpers)

algo = sp.SVD() # default is biased=True

helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_SVDBiased_reco.csv',
          estimations_path='Recommendations generated/ml-100k/Ready_SVDBiased_estimations.csv')

Generating predictions...
Generating top N recommendations...
Generating predictions...

import imp
imp.reload(ev)

import evaluation_measures as ev
dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)

ev.evaluate_all(test, dir_path, super_reactions)

943it [00:00, 11925.95it/s]
943it [00:00, 12314.95it/s]
943it [00:00, 10706.18it/s]
943it [00:00, 11925.84it/s]
943it [00:00, 11349.81it/s]
943it [00:00, 11925.98it/s]
943it [00:00, 11018.70it/s]
943it [00:00, 12396.58it/s]
943it [00:00, 12562.02it/s]
943it [00:00, 11489.71it/s]
943it [00:00, 11631.38it/s]
943it [00:00, 12210.06it/s]
943it [00:00, 11629.33it/s]

106 KiB Raw Permalink Blame History

Self made SVD

Saving and evaluating recommendations

Embeddings

project task 5: implement SVD on top baseline (as it is in Surprise library)

Ready-made SVD - Surprise implementation

SVD

SVD biased - on top baseline

106 KiB

Raw Permalink Blame History