106 KiB
106 KiB
Self made SVD
import helpers
import pandas as pd
import numpy as np
import scipy.sparse as sparse
from collections import defaultdict
from itertools import chain
import random
train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\t', header=None)
test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)
train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)
# Done similarly to https://github.com/albertauyeung/matrix-factorization-in-python
from tqdm import tqdm
class SVD():
def __init__(self, train_ui, learning_rate, regularization, nb_factors, iterations):
self.train_ui=train_ui
self.uir=list(zip(*[train_ui.nonzero()[0],train_ui.nonzero()[1], train_ui.data]))
self.learning_rate=learning_rate
self.regularization=regularization
self.iterations=iterations
self.nb_users, self.nb_items=train_ui.shape
self.nb_ratings=train_ui.nnz
self.nb_factors=nb_factors
self.Pu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))
self.Qi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))
def train(self, test_ui=None):
if test_ui!=None:
self.test_uir=list(zip(*[test_ui.nonzero()[0],test_ui.nonzero()[1], test_ui.data]))
self.learning_process=[]
pbar = tqdm(range(self.iterations))
for i in pbar:
pbar.set_description(f'Epoch {i} RMSE: {self.learning_process[-1][1] if i>0 else 0}. Training epoch {i+1}...')
np.random.shuffle(self.uir)
self.sgd(self.uir)
if test_ui==None:
self.learning_process.append([i+1, self.RMSE_total(self.uir)])
else:
self.learning_process.append([i+1, self.RMSE_total(self.uir), self.RMSE_total(self.test_uir)])
def sgd(self, uir):
for u, i, score in uir:
# Computer prediction and error
prediction = self.get_rating(u,i)
e = (score - prediction)
# Update user and item latent feature matrices
Pu_update=self.learning_rate * (e * self.Qi[i] - self.regularization * self.Pu[u])
Qi_update=self.learning_rate * (e * self.Pu[u] - self.regularization * self.Qi[i])
self.Pu[u] += Pu_update
self.Qi[i] += Qi_update
def get_rating(self, u, i):
prediction = self.Pu[u].dot(self.Qi[i].T)
return prediction
def RMSE_total(self, uir):
RMSE=0
for u,i, score in uir:
prediction = self.get_rating(u,i)
RMSE+=(score - prediction)**2
return np.sqrt(RMSE/len(uir))
def estimations(self):
self.estimations=\
np.dot(self.Pu,self.Qi.T)
def recommend(self, user_code_id, item_code_id, topK=10):
top_k = defaultdict(list)
for nb_user, user in enumerate(self.estimations):
user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]
for item, score in enumerate(user):
if item not in user_rated and not np.isnan(score):
top_k[user_code_id[nb_user]].append((item_code_id[item], score))
result=[]
# Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)
for uid, item_scores in top_k.items():
item_scores.sort(key=lambda x: x[1], reverse=True)
result.append([uid]+list(chain(*item_scores[:topK])))
return result
def estimate(self, user_code_id, item_code_id, test_ui):
result=[]
for user, item in zip(*test_ui.nonzero()):
result.append([user_code_id[user], item_code_id[item],
self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])
return result
model=SVD(train_ui, learning_rate=0.005, regularization=0.02, nb_factors=100, iterations=40)
model.train(test_ui)
Epoch 39 RMSE: 0.7469054750619549. Training epoch 40...: 100%|█████████████████████████| 40/40 [01:10<00:00, 1.76s/it]
import matplotlib.pyplot as plt
df=pd.DataFrame(model.learning_process).iloc[:,:2]
df.columns=['epoch', 'train_RMSE']
plt.plot('epoch', 'train_RMSE', data=df, color='blue')
plt.legend()
<matplotlib.legend.Legend at 0x19ac6c66f08>
import matplotlib.pyplot as plt
df=pd.DataFrame(model.learning_process[10:], columns=['epoch', 'train_RMSE', 'test_RMSE'])
plt.plot('epoch', 'train_RMSE', data=df, color='blue')
plt.plot('epoch', 'test_RMSE', data=df, color='yellow', linestyle='dashed')
plt.legend()
<matplotlib.legend.Legend at 0x19ac1ce8308>
Saving and evaluating recommendations
model.estimations()
top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))
top_n.to_csv('Recommendations generated/ml-100k/Self_SVD_reco.csv', index=False, header=False)
estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))
estimations.to_csv('Recommendations generated/ml-100k/Self_SVD_estimations.csv', index=False, header=False)
import evaluation_measures as ev
estimations_df=pd.read_csv('Recommendations generated/ml-100k/Self_SVD_estimations.csv', header=None)
reco=np.loadtxt('Recommendations generated/ml-100k/Self_SVD_reco.csv', delimiter=',')
ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None),
estimations_df=estimations_df,
reco=reco,
super_reactions=[4,5])
943it [00:00, 11351.12it/s]
RMSE | MAE | precision | recall | F_1 | F_05 | precision_super | recall_super | NDCG | mAP | MRR | LAUC | HR | F_2 | Whole_average | Reco in test | Test coverage | Shannon | Gini | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.91489 | 0.717962 | 0.102969 | 0.042325 | 0.052022 | 0.069313 | 0.093562 | 0.074994 | 0.105416 | 0.050278 | 0.191533 | 0.51789 | 0.462354 | 0.044591 | 0.150604 | 0.867656 | 0.141414 | 3.929249 | 0.971112 |
import imp
imp.reload(ev)
import evaluation_measures as ev
dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)
ev.evaluate_all(test, dir_path, super_reactions)
943it [00:00, 11925.95it/s] 943it [00:00, 12314.60it/s] 943it [00:00, 10828.94it/s] 943it [00:00, 11925.91it/s] 943it [00:00, 11489.44it/s] 943it [00:00, 11489.57it/s] 943it [00:00, 10353.22it/s] 943it [00:00, 11925.88it/s] 943it [00:00, 11631.38it/s] 943it [00:00, 11631.42it/s] 943it [00:00, 10585.78it/s] 943it [00:00, 11215.99it/s] 943it [00:00, 11215.93it/s] 943it [00:00, 11776.84it/s] 943it [00:00, 12396.27it/s] 943it [00:00, 10468.25it/s] 943it [00:00, 11972.70it/s] 943it [00:00, 11925.84it/s]
Model | RMSE | MAE | precision | recall | F_1 | F_05 | precision_super | recall_super | NDCG | mAP | MRR | LAUC | HR | F_2 | Whole_average | Reco in test | Test coverage | Shannon | Gini | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Self_RP3Beta | 3.702928 | 3.527713 | 0.322694 | 0.216069 | 0.212152 | 0.247538 | 0.245279 | 0.284983 | 0.388271 | 0.248239 | 0.636318 | 0.605683 | 0.910923 | 0.205450 | 0.376967 | 0.999788 | 0.178932 | 4.549663 | 0.950182 |
0 | Self_P3 | 3.702446 | 3.527273 | 0.282185 | 0.192092 | 0.186749 | 0.216980 | 0.204185 | 0.240096 | 0.339114 | 0.204905 | 0.572157 | 0.593544 | 0.875928 | 0.181702 | 0.340803 | 1.000000 | 0.077201 | 3.875892 | 0.974947 |
0 | Self_TopPop | 2.508258 | 2.217909 | 0.188865 | 0.116919 | 0.118732 | 0.141584 | 0.130472 | 0.137473 | 0.214651 | 0.111707 | 0.400939 | 0.555546 | 0.765642 | 0.112750 | 0.249607 | 1.000000 | 0.038961 | 3.159079 | 0.987317 |
0 | Self_SVDBaseline | 3.645871 | 3.480308 | 0.135949 | 0.078868 | 0.082011 | 0.099188 | 0.106974 | 0.103767 | 0.159486 | 0.079783 | 0.328576 | 0.536311 | 0.632025 | 0.077145 | 0.201674 | 0.999894 | 0.281385 | 5.140721 | 0.909056 |
0 | Ready_SVD | 0.950835 | 0.748676 | 0.097879 | 0.048335 | 0.053780 | 0.068420 | 0.086159 | 0.080289 | 0.113553 | 0.054094 | 0.249037 | 0.520893 | 0.498409 | 0.048439 | 0.159941 | 0.997985 | 0.204906 | 4.395721 | 0.954872 |
0 | Self_SVD | 0.914890 | 0.717962 | 0.102969 | 0.042325 | 0.052022 | 0.069313 | 0.093562 | 0.074994 | 0.105416 | 0.050278 | 0.191533 | 0.517890 | 0.462354 | 0.044591 | 0.150604 | 0.867656 | 0.141414 | 3.929249 | 0.971112 |
0 | Ready_Baseline | 0.949459 | 0.752487 | 0.091410 | 0.037652 | 0.046030 | 0.061286 | 0.079614 | 0.056463 | 0.095957 | 0.043178 | 0.198193 | 0.515501 | 0.437964 | 0.039549 | 0.141900 | 1.000000 | 0.033911 | 2.836513 | 0.991139 |
0 | Ready_SVDBiased | 0.943277 | 0.743628 | 0.080912 | 0.033048 | 0.040445 | 0.053881 | 0.070815 | 0.049631 | 0.090496 | 0.041928 | 0.200192 | 0.513176 | 0.411453 | 0.034776 | 0.135063 | 0.998727 | 0.168110 | 4.165618 | 0.964211 |
0 | Self_KNNSurprisetask | 0.946255 | 0.745209 | 0.083457 | 0.032848 | 0.041227 | 0.055493 | 0.074785 | 0.048890 | 0.089577 | 0.040902 | 0.189057 | 0.513076 | 0.417815 | 0.034996 | 0.135177 | 0.888547 | 0.130592 | 3.611806 | 0.978659 |
0 | Self_TopRated | 2.508258 | 2.217909 | 0.079321 | 0.032667 | 0.039983 | 0.053170 | 0.068884 | 0.048582 | 0.070766 | 0.027602 | 0.114790 | 0.512943 | 0.411453 | 0.034385 | 0.124546 | 1.000000 | 0.024531 | 2.761238 | 0.991660 |
0 | Self_GlobalAvg | 1.125760 | 0.943534 | 0.061188 | 0.025968 | 0.031383 | 0.041343 | 0.040558 | 0.032107 | 0.067695 | 0.027470 | 0.171187 | 0.509546 | 0.384942 | 0.027213 | 0.118383 | 1.000000 | 0.025974 | 2.711772 | 0.992003 |
0 | Ready_Random | 1.514265 | 1.215956 | 0.048780 | 0.021007 | 0.024667 | 0.032495 | 0.031867 | 0.023414 | 0.052904 | 0.020511 | 0.126790 | 0.507024 | 0.322375 | 0.021635 | 0.102789 | 0.988017 | 0.183983 | 5.100443 | 0.906900 |
0 | Ready_I-KNN | 1.030386 | 0.813067 | 0.026087 | 0.006908 | 0.010593 | 0.016046 | 0.021137 | 0.009522 | 0.024214 | 0.008958 | 0.048068 | 0.499885 | 0.154825 | 0.008007 | 0.069521 | 0.402333 | 0.434343 | 5.133650 | 0.877999 |
0 | Ready_I-KNNBaseline | 0.935327 | 0.737424 | 0.002545 | 0.000755 | 0.001105 | 0.001602 | 0.002253 | 0.000930 | 0.003444 | 0.001362 | 0.011760 | 0.496724 | 0.021209 | 0.000862 | 0.045379 | 0.482821 | 0.059885 | 2.232578 | 0.994487 |
0 | Ready_U-KNN | 1.023495 | 0.807913 | 0.000742 | 0.000205 | 0.000305 | 0.000449 | 0.000536 | 0.000198 | 0.000845 | 0.000274 | 0.002744 | 0.496441 | 0.007423 | 0.000235 | 0.042533 | 0.602121 | 0.010823 | 2.089186 | 0.995706 |
0 | Self_BaselineIU | 0.958136 | 0.754051 | 0.000954 | 0.000188 | 0.000298 | 0.000481 | 0.000644 | 0.000223 | 0.001043 | 0.000335 | 0.003348 | 0.496433 | 0.009544 | 0.000220 | 0.042809 | 0.699046 | 0.005051 | 1.945910 | 0.995669 |
0 | Self_BaselineUI | 0.967585 | 0.762740 | 0.000954 | 0.000170 | 0.000278 | 0.000463 | 0.000644 | 0.000189 | 0.000752 | 0.000168 | 0.001677 | 0.496424 | 0.009544 | 0.000201 | 0.042622 | 0.600530 | 0.005051 | 1.803126 | 0.996380 |
0 | Self_IKNN | 1.018363 | 0.808793 | 0.000318 | 0.000108 | 0.000140 | 0.000189 | 0.000000 | 0.000000 | 0.000214 | 0.000037 | 0.000368 | 0.496391 | 0.003181 | 0.000118 | 0.041755 | 0.392153 | 0.115440 | 4.174741 | 0.965327 |
Embeddings
x=np.array([[1,2],[3,4]])
display(x)
x/np.linalg.norm(x, axis=1)[:,None]
array([[1, 2], [3, 4]])
array([[0.4472136 , 0.89442719], [0.6 , 0.8 ]])
item=random.choice(list(set(train_ui.indices)))
embeddings_norm=model.Qi/np.linalg.norm(model.Qi, axis=1)[:,None] # we do not mean-center here
# omitting normalization also makes sense, but items with a greater magnitude will be recommended more often
similarity_scores=np.dot(embeddings_norm,embeddings_norm[item].T)
top_similar_items=pd.DataFrame(enumerate(similarity_scores), columns=['code', 'score'])\
.sort_values(by=['score'], ascending=[False])[:10]
top_similar_items['item_id']=top_similar_items['code'].apply(lambda x: item_code_id[x])
items=pd.read_csv('./Datasets/ml-100k/movies.csv')
result=pd.merge(top_similar_items, items, left_on='item_id', right_on='id')
result
code | score | item_id | id | title | genres | |
---|---|---|---|---|---|---|
0 | 423 | 1.000000 | 424 | 424 | Children of the Corn: The Gathering (1996) | Horror |
1 | 984 | 0.980977 | 985 | 985 | Blood & Wine (1997) | Drama |
2 | 1458 | 0.980754 | 1459 | 1459 | Madame Butterfly (1995) | Musical |
3 | 1278 | 0.980699 | 1279 | 1279 | Wild America (1997) | Adventure, Children's |
4 | 1380 | 0.980041 | 1381 | 1381 | Losing Chase (1996) | Drama |
5 | 744 | 0.979974 | 745 | 745 | Ruling Class, The (1972) | Comedy |
6 | 705 | 0.979904 | 706 | 706 | Bad Moon (1996) | Horror |
7 | 1085 | 0.979812 | 1086 | 1086 | It's My Party (1995) | Drama |
8 | 1237 | 0.979443 | 1238 | 1238 | Full Speed (1996) | Drama |
9 | 1190 | 0.979429 | 1191 | 1191 | Letter From Death Row, A (1998) | Crime, Drama |
project task 5: implement SVD on top baseline (as it is in Surprise library)
# making changes to our implementation by considering additional parameters in the gradient descent procedure
# seems to be the fastest option
# please save the output in 'Recommendations generated/ml-100k/Self_SVDBaseline_reco.csv' and
# 'Recommendations generated/ml-100k/Self_SVDBaseline_estimations.csv'
# Done similarly to https://github.com/albertauyeung/matrix-factorization-in-python
from tqdm import tqdm
class SVDboosted():
def __init__(self, train_ui, learning_rate, regularization, nb_factors, iterations):
self.train_ui=train_ui
self.uir=list(zip(*[train_ui.nonzero()[0],train_ui.nonzero()[1], train_ui.data]))
self.learning_rate=learning_rate
self.regularization=regularization
self.iterations=iterations
self.nb_users, self.nb_items=train_ui.shape
self.nb_ratings=train_ui.nnz
self.nb_factors=nb_factors
###################################################
#TASK
self.Bu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))
self.Bi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))
###################################################
self.Pu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))
self.Qi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))
def train(self, test_ui=None):
if test_ui!=None:
self.test_uir=list(zip(*[test_ui.nonzero()[0],test_ui.nonzero()[1], test_ui.data]))
self.learning_process=[]
pbar = tqdm(range(self.iterations))
for i in pbar:
pbar.set_description(f'Epoch {i} RMSE: {self.learning_process[-1][1] if i>0 else 0}. Training epoch {i+1}...')
np.random.shuffle(self.uir)
self.sgd(self.uir)
if test_ui==None:
self.learning_process.append([i+1, self.RMSE_total(self.uir)])
else:
self.learning_process.append([i+1, self.RMSE_total(self.uir), self.RMSE_total(self.test_uir)])
def sgd(self, uir):
for u, i, score in uir:
# Computer prediction and error
prediction = self.get_rating(u,i)
e = (score - prediction)
# Update user and item latent feature matrices
Pu_update=self.learning_rate * (e * self.Qi[i] - self.regularization * self.Pu[u])
Qi_update=self.learning_rate * (e * self.Pu[u] - self.regularization * self.Qi[i])
###################################################
#TASK
Bu_update = self.learning_rate * (e - self.regularization * self.Bu[u])
Bi_update = self.learning_rate * (e - self.regularization * self.Bi[i])
self.Bu[u] += Bu_update
self.Bi[i] += Bi_update
###################################################
self.Pu[u] += Pu_update
self.Qi[i] += Qi_update
def get_rating(self, u, i):
prediction = self.Bu[u] + self.Bi[i] + self.Pu[u].dot(self.Qi[i].T)
return prediction
def RMSE_total(self, uir):
RMSE=0
for u,i, score in uir:
prediction = self.get_rating(u,i)
###################################################
#TASK
RMSE+=(score - prediction)**2
#+ self.regularization * (self.Bi[i]**2 + self.Bu[u]**2 + self.Qi[i]**2 + self.Pu[u]**2)
###################################################
return np.sqrt(RMSE/len(uir))
def estimations(self):
self.estimations=\
np.dot(self.Pu,self.Qi.T)
def recommend(self, user_code_id, item_code_id, topK=10):
top_k = defaultdict(list)
for nb_user, user in enumerate(self.estimations):
user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]
for item, score in enumerate(user):
if item not in user_rated and not np.isnan(score):
top_k[user_code_id[nb_user]].append((item_code_id[item], score))
result=[]
# Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)
for uid, item_scores in top_k.items():
item_scores.sort(key=lambda x: x[1], reverse=True)
result.append([uid]+list(chain(*item_scores[:topK])))
return result
def estimate(self, user_code_id, item_code_id, test_ui):
result=[]
for user, item in zip(*test_ui.nonzero()):
result.append([user_code_id[user], item_code_id[item],
self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])
return result
model=SVDboosted(train_ui, learning_rate=0.005, regularization=0.02, nb_factors=100, iterations=40)
model.train(test_ui)
Epoch 1 RMSE: [1.56545038 1.56550953 1.56561831 1.56506255 1.56544717 1.56542472 | 1/40 [00:02<01:50, 2.83s/it] Epoch 1 RMSE: [1.56545038 1.56550953 1.56561831 1.56506255 1.56544717 1.56542472 Epoch 2 RMSE: [1.24195612 1.24198904 1.24211143 1.24181535 1.24194284 1.24200128 Epoch 2 RMSE: [1.24195612 1.24198904 1.24211143 1.24181535 1.24194284 1.24200128 Epoch 3 RMSE: [1.13247887 1.13249914 1.13258642 1.13239893 1.13245611 1.13250907 Epoch 3 RMSE: [1.13247887 1.13249914 1.13258642 1.13239893 1.13245611 1.13250907 Epoch 4 RMSE: [1.07474573 1.07476035 1.07482625 1.07468999 1.07472024 1.07476528 Epoch 4 RMSE: [1.07474573 1.07476035 1.07482625 1.07468999 1.07472024 1.07476528 Epoch 5 RMSE: [1.03819311 1.03820533 1.03825798 1.03815174 1.03816959 1.03820895 Epoch 5 RMSE: [1.03819311 1.03820533 1.03825798 1.03815174 1.03816959 1.03820895 Epoch 6 RMSE: [1.01305431 1.01306406 1.0131073 1.0130215 1.01303201 1.01306765 Epoch 6 RMSE: [1.01305431 1.01306406 1.0131073 1.0130215 1.01303201 1.01306765 Epoch 7 RMSE: [0.99453145 0.99453934 0.99457617 0.99450541 0.99451055 0.99454367 Epoch 7 RMSE: [0.99453145 0.99453934 0.99457617 0.99450541 0.99451055 0.99454367 Epoch 8 RMSE: [0.98059211 0.98059798 0.9806297 0.98057082 0.980572 0.98060328 Epoch 8 RMSE: [0.98059211 0.98059798 0.9806297 0.98057082 0.980572 0.98060328 Epoch 9 RMSE: [0.96964915 0.96965335 0.96968135 0.96963161 0.96962996 0.96965908 Epoch 9 RMSE: [0.96964915 0.96965335 0.96968135 0.96963161 0.96962996 0.96965908 Epoch 10 RMSE: [0.96082121 0.96082423 0.96084907 0.96080661 0.96080248 0.9608298 Epoch 10 RMSE: [0.96082121 0.96082423 0.96084907 0.96080661 0.96080248 0.9608298 Epoch 11 RMSE: [0.95365923 0.95366146 0.95368372 0.95364696 0.95364089 0.953667 Epoch 11 RMSE: [0.95365923 0.95366146 0.95368372 0.95364696 0.95364089 0.953667 Epoch 12 RMSE: [0.94766016 0.94766156 0.94768159 0.94764986 0.947642 0.94766674 Epoch 12 RMSE: [0.94766016 0.94766156 0.94768159 0.94764986 0.947642 0.94766674 Epoch 13 RMSE: [0.94261767 0.94261834 0.94263624 0.94260867 0.94259966 0.94262314 Epoch 13 RMSE: [0.94261767 0.94261834 0.94263624 0.94260867 0.94259966 0.94262314 Epoch 14 RMSE: [0.938169 0.9381694 0.93818542 0.93816133 0.93815137 0.93817355 Epoch 14 RMSE: [0.938169 0.9381694 0.93818542 0.93816133 0.93815137 0.93817355 Epoch 15 RMSE: [0.93426778 0.9342676 0.9342824 0.93426131 0.93425033 0.93427147 Epoch 15 RMSE: [0.93426778 0.9342676 0.9342824 0.93426131 0.93425033 0.93427147 Epoch 16 RMSE: [0.93070807 0.93070774 0.93072136 0.93070245 0.93069117 0.93071116 Epoch 16 RMSE: [0.93070807 0.93070774 0.93072136 0.93070245 0.93069117 0.93071116 Epoch 17 RMSE: [0.92736841 0.92736783 0.9273801 0.92736348 0.92735194 0.92737089 Epoch 17 RMSE: [0.92736841 0.92736783 0.9273801 0.92736348 0.92735194 0.92737089 Epoch 18 RMSE: [0.92401862 0.92401778 0.92402901 0.92401417 0.92400251 0.92402036 Epoch 18 RMSE: [0.92401862 0.92401778 0.92402901 0.92401417 0.92400251 0.92402036 Epoch 19 RMSE: [0.92070555 0.92070466 0.92071497 0.92070168 0.92069002 0.92070695 Epoch 19 RMSE: [0.92070555 0.92070466 0.92071497 0.92070168 0.92069002 0.92070695 Epoch 20 RMSE: [0.91707281 0.91707182 0.91708121 0.91706927 0.91705756 0.91707364 Epoch 20 RMSE: [0.91707281 0.91707182 0.91708121 0.91706927 0.91705756 0.91707364 Epoch 21 RMSE: [0.91348792 0.91348679 0.91349536 0.91348474 0.91347301 0.91348798 Epoch 21 RMSE: [0.91348792 0.91348679 0.91349536 0.91348474 0.91347301 0.91348798 Epoch 22 RMSE: [0.90957269 0.90957161 0.90957937 0.90956986 0.9095583 0.90957263 Epoch 22 RMSE: [0.90957269 0.90957161 0.90957937 0.90956986 0.9095583 0.90957263 Epoch 23 RMSE: [0.90514652 0.90514525 0.90515258 0.90514401 0.90513237 0.90514616 Epoch 23 RMSE: [0.90514652 0.90514525 0.90515258 0.90514401 0.90513237 0.90514616 Epoch 24 RMSE: [0.90024301 0.90024177 0.90024834 0.90024062 0.90022924 0.90024226 Epoch 24 RMSE: [0.90024301 0.90024177 0.90024834 0.90024062 0.90022924 0.90024226 Epoch 25 RMSE: [0.89486164 0.89486046 0.89486646 0.89485941 0.89484821 0.89486074 Epoch 25 RMSE: [0.89486164 0.89486046 0.89486646 0.89485941 0.89484821 0.89486074 Epoch 26 RMSE: [0.88903799 0.88903702 0.88904243 0.88903603 0.88902515 0.88903703 Epoch 26 RMSE: [0.88903799 0.88903702 0.88904243 0.88903603 0.88902515 0.88903703 Epoch 27 RMSE: [0.88287967 0.88287863 0.88288359 0.88287782 0.88286725 0.88287848 Epoch 27 RMSE: [0.88287967 0.88287863 0.88288359 0.88287782 0.88286725 0.88287848 Epoch 28 RMSE: [0.87627836 0.87627739 0.87628186 0.8762767 0.87626624 0.8762769 Epoch 28 RMSE: [0.87627836 0.87627739 0.87628186 0.8762767 0.87626624 0.8762769 Epoch 29 RMSE: [0.86929878 0.86929781 0.86930189 0.86929724 0.86928706 0.86929718 Epoch 29 RMSE: [0.86929878 0.86929781 0.86930189 0.86929724 0.86928706 0.86929718 Epoch 30 RMSE: [0.8620862 0.86208531 0.86208899 0.86208476 0.86207471 0.86208446 Epoch 30 RMSE: [0.8620862 0.86208531 0.86208899 0.86208476 0.86207471 0.86208446 Epoch 31 RMSE: [0.85452681 0.85452603 0.85452921 0.85452551 0.85451573 0.85452503 Epoch 31 RMSE: [0.85452681 0.85452603 0.85452921 0.85452551 0.85451573 0.85452503 Epoch 32 RMSE: [0.84666254 0.84666179 0.84666469 0.84666136 0.8466519 0.84666071 Epoch 32 RMSE: [0.84666254 0.84666179 0.84666469 0.84666136 0.8466519 0.84666071 Epoch 33 RMSE: [0.83814164 0.83814095 0.83814355 0.83814049 0.83813121 0.83813985 Epoch 33 RMSE: [0.83814164 0.83814095 0.83814355 0.83814049 0.83813121 0.83813985 Epoch 34 RMSE: [0.8295605 0.82955997 0.82956222 0.82955955 0.8295505 0.82955863 Epoch 34 RMSE: [0.8295605 0.82955997 0.82956222 0.82955955 0.8295505 0.82955863 Epoch 35 RMSE: [0.82044053 0.82044008 0.82044203 0.82043968 0.82043093 0.82043865 Epoch 35 RMSE: [0.82044053 0.82044008 0.82044203 0.82043968 0.82043093 0.82043865 Epoch 36 RMSE: [0.81096402 0.81096361 0.81096533 0.81096327 0.8109547 0.81096221 Epoch 36 RMSE: [0.81096402 0.81096361 0.81096533 0.81096327 0.8109547 0.81096221 Epoch 37 RMSE: [0.80132847 0.80132813 0.80132952 0.80132774 0.80131947 0.80132666 Epoch 37 RMSE: [0.80132847 0.80132813 0.80132952 0.80132774 0.80131947 0.80132666 Epoch 38 RMSE: [0.79114637 0.79114614 0.79114736 0.79114574 0.79113763 0.79114471 Epoch 38 RMSE: [0.79114637 0.79114614 0.79114736 0.79114574 0.79113763 0.79114471 Epoch 39 RMSE: [0.78071211 0.78071175 0.78071286 0.78071155 0.7807037 0.78071049 Epoch 39 RMSE: [0.78071211 0.78071175 0.78071286 0.78071155 0.7807037 0.78071049 Epoch 39 RMSE: [0.78071211 0.78071175 0.78071286 0.78071155 0.7807037 0.78071049 0.78071326 0.78071879 0.78071336 0.7
model.estimations()
top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))
top_n.to_csv('Recommendations generated/ml-100k/Self_SVDBaseline_reco.csv', index=False, header=False)
estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))
estimations.to_csv('Recommendations generated/ml-100k/Self_SVDBaseline_estimations.csv', index=False, header=False)
import imp
imp.reload(ev)
import evaluation_measures as ev
dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)
ev.evaluate_all(test, dir_path, super_reactions)
943it [00:00, 11925.62it/s] 943it [00:00, 12690.64it/s] 943it [00:00, 11084.16it/s] 943it [00:00, 12561.94it/s] 943it [00:00, 11489.57it/s] 943it [00:00, 11776.67it/s] 943it [00:00, 10955.28it/s] 943it [00:00, 12078.77it/s] 943it [00:00, 11776.77it/s] 943it [00:00, 11631.38it/s] 943it [00:00, 11631.38it/s] 943it [00:00, 12235.67it/s] 943it [00:00, 11350.01it/s] 943it [00:00, 11702.10it/s] 943it [00:00, 12645.60it/s] 943it [00:00, 10586.01it/s] 943it [00:00, 11631.01it/s] 943it [00:00, 11631.32it/s]
Model | RMSE | MAE | precision | recall | F_1 | F_05 | precision_super | recall_super | NDCG | mAP | MRR | LAUC | HR | F_2 | Whole_average | Reco in test | Test coverage | Shannon | Gini | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Self_RP3Beta | 3.702928 | 3.527713 | 0.322694 | 0.216069 | 0.212152 | 0.247538 | 0.245279 | 0.284983 | 0.388271 | 0.248239 | 0.636318 | 0.605683 | 0.910923 | 0.205450 | 0.376967 | 0.999788 | 0.178932 | 4.549663 | 0.950182 |
0 | Self_P3 | 3.702446 | 3.527273 | 0.282185 | 0.192092 | 0.186749 | 0.216980 | 0.204185 | 0.240096 | 0.339114 | 0.204905 | 0.572157 | 0.593544 | 0.875928 | 0.181702 | 0.340803 | 1.000000 | 0.077201 | 3.875892 | 0.974947 |
0 | Self_TopPop | 2.508258 | 2.217909 | 0.188865 | 0.116919 | 0.118732 | 0.141584 | 0.130472 | 0.137473 | 0.214651 | 0.111707 | 0.400939 | 0.555546 | 0.765642 | 0.112750 | 0.249607 | 1.000000 | 0.038961 | 3.159079 | 0.987317 |
0 | Self_SVDBaseline | 3.645666 | 3.480246 | 0.137858 | 0.082398 | 0.084151 | 0.101063 | 0.107940 | 0.109393 | 0.164477 | 0.082973 | 0.342374 | 0.538097 | 0.638388 | 0.079860 | 0.205748 | 0.999894 | 0.279221 | 5.159076 | 0.907220 |
0 | Ready_SVD | 0.950835 | 0.748676 | 0.097879 | 0.048335 | 0.053780 | 0.068420 | 0.086159 | 0.080289 | 0.113553 | 0.054094 | 0.249037 | 0.520893 | 0.498409 | 0.048439 | 0.159941 | 0.997985 | 0.204906 | 4.395721 | 0.954872 |
0 | Self_SVD | 0.914890 | 0.717962 | 0.102969 | 0.042325 | 0.052022 | 0.069313 | 0.093562 | 0.074994 | 0.105416 | 0.050278 | 0.191533 | 0.517890 | 0.462354 | 0.044591 | 0.150604 | 0.867656 | 0.141414 | 3.929249 | 0.971112 |
0 | Ready_Baseline | 0.949459 | 0.752487 | 0.091410 | 0.037652 | 0.046030 | 0.061286 | 0.079614 | 0.056463 | 0.095957 | 0.043178 | 0.198193 | 0.515501 | 0.437964 | 0.039549 | 0.141900 | 1.000000 | 0.033911 | 2.836513 | 0.991139 |
0 | Ready_SVDBiased | 0.943277 | 0.743628 | 0.080912 | 0.033048 | 0.040445 | 0.053881 | 0.070815 | 0.049631 | 0.090496 | 0.041928 | 0.200192 | 0.513176 | 0.411453 | 0.034776 | 0.135063 | 0.998727 | 0.168110 | 4.165618 | 0.964211 |
0 | Self_KNNSurprisetask | 0.946255 | 0.745209 | 0.083457 | 0.032848 | 0.041227 | 0.055493 | 0.074785 | 0.048890 | 0.089577 | 0.040902 | 0.189057 | 0.513076 | 0.417815 | 0.034996 | 0.135177 | 0.888547 | 0.130592 | 3.611806 | 0.978659 |
0 | Self_TopRated | 2.508258 | 2.217909 | 0.079321 | 0.032667 | 0.039983 | 0.053170 | 0.068884 | 0.048582 | 0.070766 | 0.027602 | 0.114790 | 0.512943 | 0.411453 | 0.034385 | 0.124546 | 1.000000 | 0.024531 | 2.761238 | 0.991660 |
0 | Self_GlobalAvg | 1.125760 | 0.943534 | 0.061188 | 0.025968 | 0.031383 | 0.041343 | 0.040558 | 0.032107 | 0.067695 | 0.027470 | 0.171187 | 0.509546 | 0.384942 | 0.027213 | 0.118383 | 1.000000 | 0.025974 | 2.711772 | 0.992003 |
0 | Ready_Random | 1.514265 | 1.215956 | 0.048780 | 0.021007 | 0.024667 | 0.032495 | 0.031867 | 0.023414 | 0.052904 | 0.020511 | 0.126790 | 0.507024 | 0.322375 | 0.021635 | 0.102789 | 0.988017 | 0.183983 | 5.100443 | 0.906900 |
0 | Ready_I-KNN | 1.030386 | 0.813067 | 0.026087 | 0.006908 | 0.010593 | 0.016046 | 0.021137 | 0.009522 | 0.024214 | 0.008958 | 0.048068 | 0.499885 | 0.154825 | 0.008007 | 0.069521 | 0.402333 | 0.434343 | 5.133650 | 0.877999 |
0 | Ready_I-KNNBaseline | 0.935327 | 0.737424 | 0.002545 | 0.000755 | 0.001105 | 0.001602 | 0.002253 | 0.000930 | 0.003444 | 0.001362 | 0.011760 | 0.496724 | 0.021209 | 0.000862 | 0.045379 | 0.482821 | 0.059885 | 2.232578 | 0.994487 |
0 | Ready_U-KNN | 1.023495 | 0.807913 | 0.000742 | 0.000205 | 0.000305 | 0.000449 | 0.000536 | 0.000198 | 0.000845 | 0.000274 | 0.002744 | 0.496441 | 0.007423 | 0.000235 | 0.042533 | 0.602121 | 0.010823 | 2.089186 | 0.995706 |
0 | Self_BaselineIU | 0.958136 | 0.754051 | 0.000954 | 0.000188 | 0.000298 | 0.000481 | 0.000644 | 0.000223 | 0.001043 | 0.000335 | 0.003348 | 0.496433 | 0.009544 | 0.000220 | 0.042809 | 0.699046 | 0.005051 | 1.945910 | 0.995669 |
0 | Self_BaselineUI | 0.967585 | 0.762740 | 0.000954 | 0.000170 | 0.000278 | 0.000463 | 0.000644 | 0.000189 | 0.000752 | 0.000168 | 0.001677 | 0.496424 | 0.009544 | 0.000201 | 0.042622 | 0.600530 | 0.005051 | 1.803126 | 0.996380 |
0 | Self_IKNN | 1.018363 | 0.808793 | 0.000318 | 0.000108 | 0.000140 | 0.000189 | 0.000000 | 0.000000 | 0.000214 | 0.000037 | 0.000368 | 0.496391 | 0.003181 | 0.000118 | 0.041755 | 0.392153 | 0.115440 | 4.174741 | 0.965327 |
Ready-made SVD - Surprise implementation
SVD
import helpers
import surprise as sp
import imp
imp.reload(helpers)
algo = sp.SVD(biased=False) # to use unbiased version
helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_SVD_reco.csv',
estimations_path='Recommendations generated/ml-100k/Ready_SVD_estimations.csv')
Generating predictions... Generating top N recommendations... Generating predictions...
SVD biased - on top baseline
import helpers
import surprise as sp
import imp
imp.reload(helpers)
algo = sp.SVD() # default is biased=True
helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_SVDBiased_reco.csv',
estimations_path='Recommendations generated/ml-100k/Ready_SVDBiased_estimations.csv')
Generating predictions... Generating top N recommendations... Generating predictions...
import imp
imp.reload(ev)
import evaluation_measures as ev
dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)
ev.evaluate_all(test, dir_path, super_reactions)
943it [00:00, 11925.95it/s] 943it [00:00, 12314.95it/s] 943it [00:00, 10706.18it/s] 943it [00:00, 11925.84it/s] 943it [00:00, 11349.81it/s] 943it [00:00, 11925.98it/s] 943it [00:00, 11018.70it/s] 943it [00:00, 12396.58it/s] 943it [00:00, 12562.02it/s] 943it [00:00, 11489.71it/s] 943it [00:00, 11631.38it/s] 943it [00:00, 12210.06it/s] 943it [00:00, 11629.33it/s]