168 KiB
168 KiB
Preparing dataset
import pandas as pd
import numpy as np
import scipy.sparse as sparse
from collections import defaultdict
from itertools import chain
import random
train_read=pd.read_csv('.Datasets/ml-100k/train.csv', sep='\t', header=None, names=['user', 'item', 'rating', 'timestamp'])
test_read=pd.read_csv('.Datasets/ml-100k/test.csv', sep='\t', header=None, names=['user', 'item', 'rating', 'timestamp'])
# Let's prepare dataset
train_and_test=pd.concat([train_read, test_read], axis=0, ignore_index=True)
train_and_test['user_code'] = train_and_test['user'].astype("category").cat.codes
train_and_test['item_code'] = train_and_test['item'].astype("category").cat.codes
user_code_id = dict(enumerate(train_and_test['user'].astype("category").cat.categories))
user_id_code = dict((v, k) for k, v in user_code_id.items())
item_code_id = dict(enumerate(train_and_test['item'].astype("category").cat.categories))
item_id_code = dict((v, k) for k, v in item_code_id.items())
train_and_test[:5]
user | item | rating | timestamp | user_code | item_code | |
---|---|---|---|---|---|---|
0 | 664 | 525 | 4 | 876526580 | 663 | 524 |
1 | 49 | 1 | 2 | 888068651 | 48 | 0 |
2 | 352 | 273 | 2 | 884290328 | 351 | 272 |
3 | 618 | 96 | 3 | 891307749 | 617 | 95 |
4 | 560 | 24 | 2 | 879976772 | 559 | 23 |
train_df=pd.merge(train_read, train_and_test, on=list(train_read.columns))
test_df=pd.merge(test_read, train_and_test, on=list(train_read.columns))
# Take number of users and items
(U,I)=(train_and_test['user_code'].max()+1, train_and_test['item_code'].max()+1)
# Create sparse csr matrices
train_ui = sparse.csr_matrix((train_df['rating'], (train_df['user_code'], train_df['item_code'])), shape=(U, I))
test_ui = sparse.csr_matrix((test_df['rating'], (test_df['user_code'], test_df['item_code'])), shape=(U, I))
pip install surprise
Collecting surprise Downloading https://files.pythonhosted.org/packages/61/de/e5cba8682201fcf9c3719a6fdda95693468ed061945493dea2dd37c5618b/surprise-0.1-py2.py3-none-any.whl Collecting scikit-surprise [?25l Downloading https://files.pythonhosted.org/packages/f5/da/b5700d96495fb4f092be497f02492768a3d96a3f4fa2ae7dea46d4081cfa/scikit-surprise-1.1.0.tar.gz (6.4MB) [K |████████████████████████████████| 6.5MB 7.5MB/s [?25hRequirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-surprise->surprise) (0.15.1) Requirement already satisfied: numpy>=1.11.2 in /usr/local/lib/python3.6/dist-packages (from scikit-surprise->surprise) (1.18.5) Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.6/dist-packages (from scikit-surprise->surprise) (1.4.1) Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from scikit-surprise->surprise) (1.12.0) Building wheels for collected packages: scikit-surprise Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone Created wheel for scikit-surprise: filename=scikit_surprise-1.1.0-cp36-cp36m-linux_x86_64.whl size=1675379 sha256=642a6aaed1cac33b0dfb9b2d752b4a8db25dc9d12c56506722d7c68d7aefba86 Stored in directory: /root/.cache/pip/wheels/cc/fa/8c/16c93fccce688ae1bde7d979ff102f7bee980d9cfeb8641bcf Successfully built scikit-surprise Installing collected packages: scikit-surprise, surprise Successfully installed scikit-surprise-1.1.0 surprise-0.1
# Above steps are the same for many algorithms, so I put the code in separate file:
import helpers
train_read=pd.read_csv('.Datasets/ml-100k/train.csv', sep='\t', header=None)
test_read=pd.read_csv('.Datasets/ml-100k/test.csv', sep='\t', header=None)
train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)
CSR matrices - what is it?
row = np.array([0, 0, 0, 1, 1, 2, 2, 2])
col = np.array([0, 1, 2, 1, 3, 2, 0, 3])
data = np.array([4, 1, 3, 2,1, 5, 2, 4])
sample_csr=sparse.csr_matrix((data, (row, col)))
sample_csr
<3x4 sparse matrix of type '<class 'numpy.longlong'>' with 8 stored elements in Compressed Sparse Row format>
print('Ratings matrix with missing entries replaced by zeros:')
display(sample_csr.todense())
print('\nNumber of ratings: {} \nNumber of users: {} \nNumber of items: {} \n'
.format(sample_csr.nnz, sample_csr.shape[0], sample_csr.shape[1]))
Ratings matrix with missing entries replaced by zeros:
matrix([[4, 1, 3, 0], [0, 2, 0, 1], [2, 0, 5, 4]], dtype=int64)
Number of ratings: 8 Number of users: 3 Number of items: 4
print('Ratings data:', sample_csr.data)
print('Regarding items:', sample_csr.indices)
for i in range(sample_csr.shape[0]):
print('Where ratings from {} to {} belongs to user {}.'.format(sample_csr.indptr[i], sample_csr.indptr[i+1]-1, i))
Ratings data: [4 1 3 2 1 2 5 4] Regarding items: [0 1 2 1 3 0 2 3] Where ratings from 0 to 2 belongs to user 0. Where ratings from 3 to 4 belongs to user 1. Where ratings from 5 to 7 belongs to user 2.
user=123
print('Efficient way to access items rated by user:')
display(train_ui.indices[train_ui.indptr[user]:train_ui.indptr[user+1]])
%timeit train_ui.indices[train_ui.indptr[user]:train_ui.indptr[user+1]]
print('Inefficient way to access items rated by user:')
display(train_ui[user].indices)
%timeit train_ui[user].indices
Efficient way to access items rated by user:
array([ 0, 6, 10, 27, 49, 78, 95, 97, 116, 143, 153, 156, 167, 171, 172, 173, 194, 208, 225, 473, 495, 549, 615], dtype=int32)
The slowest run took 15.23 times longer than the fastest. This could mean that an intermediate result is being cached. 1000000 loops, best of 3: 703 ns per loop Inefficient way to access items rated by user:
array([ 0, 6, 10, 27, 49, 78, 95, 97, 116, 143, 153, 156, 167, 171, 172, 173, 194, 208, 225, 473, 495, 549, 615], dtype=int32)
10000 loops, best of 3: 77.4 µs per loop
Example: subtracting row means
print('Our matrix:')
display(sample_csr.todense())
print('List of row sums:')
sample_csr.sum(axis=1).ravel()
Our matrix:
matrix([[4, 1, 3, 0], [0, 2, 0, 1], [2, 0, 5, 4]], dtype=int64)
List of row sums:
matrix([[ 8, 3, 11]])
print('Array with row means:')
row_means=np.asarray(sample_csr.sum(axis=1).ravel())[0]/np.diff(sample_csr.indptr)
display(row_means)
print('Diagonal csr matrix with inverse of row sums on diagonal:')
display(sparse.diags(row_means).todense())
print("""Let's apply them in nonzero entries:""")
to_subtract=sparse.diags(row_means)*sample_csr.power(0)
display(to_subtract.todense())
print("Finally after subtraction:")
sample_csr-to_subtract.todense()
Array with row means:
array([2.66666667, 1.5 , 3.66666667])
Diagonal csr matrix with inverse of row sums on diagonal:
matrix([[2.66666667, 0. , 0. ], [0. , 1.5 , 0. ], [0. , 0. , 3.66666667]])
Let's apply them in nonzero entries:
matrix([[2.66666667, 2.66666667, 2.66666667, 0. ], [0. , 1.5 , 0. , 1.5 ], [3.66666667, 0. , 3.66666667, 3.66666667]])
Finally after subtraction:
matrix([[ 1.33333333, -1.66666667, 0.33333333, 0. ], [ 0. , 0.5 , 0. , -0.5 ], [-1.66666667, 0. , 1.33333333, 0.33333333]])
Transposing
import numpy as np
from scipy import sparse
row = np.array([0, 0, 0, 1, 1, 2, 2, 2])
col = np.array([0, 1, 2, 1, 3, 2, 0, 3])
data = np.array([4, 1, 3, 2,1, 5, 2, 4])
sample=sparse.csr_matrix((data, (row, col)))
print('Sample matrix: \n', sample.A)
print('\nIndices: \n', sample.indices)
transposed=sample.transpose()
print('\nTransposed matrix: \n', transposed.A)
print('\nIndices of transposed matrix: \n', transposed.indices)
print('\nReason: ', type(transposed))
print('\nAfter converting to csr: \n', transposed.tocsr().indices)
Sample matrix: [[4 1 3 0] [0 2 0 1] [2 0 5 4]] Indices: [0 1 2 1 3 0 2 3] Transposed matrix: [[4 0 2] [1 2 0] [3 0 5] [0 1 4]] Indices of transposed matrix: [0 1 2 1 3 0 2 3] Reason: <class 'scipy.sparse.csc.csc_matrix'> After converting to csr: [0 2 0 1 0 2 1 2]
Self made top popular
import os
if not os.path.exists('./Recommendations generated/'):
os.mkdir('./Recommendations generated/')
os.mkdir('./Recommendations generated/ml-100k/')
os.mkdir('./Recommendations generated/toy-example/')
TopPop=[]
train_iu=train_ui.transpose().tocsr()
scaling_factor=train_ui.max()/max(np.diff(train_iu.indptr))
for i in range(train_iu.shape[0]):
TopPop.append((i, (train_iu.indptr[i+1]-train_iu.indptr[i])*scaling_factor))
TopPop.sort(key=lambda x: x[1], reverse=True)
#TopPop is an array of pairs (item, rescaled_popularity) sorted descending from the most popular
k=10
result=[]
for u in range(train_ui.shape[0]):
user_rated=train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]
rec_user=[]
item_pos=0
while len(rec_user)<10:
if TopPop[item_pos][0] not in user_rated:
rec_user.append((item_code_id[TopPop[item_pos][0]], TopPop[item_pos][1]))
item_pos+=1
result.append([user_code_id[u]]+list(chain(*rec_user)))
(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopPop_reco.csv', index=False, header=False)
# estimations - score is a bit artificial since that method is not designed for scoring, but for ranking
estimations=[]
for user, item in zip(*test_ui.nonzero()):
estimations.append([user_code_id[user], item_code_id[item],
(train_iu.indptr[item+1]-train_iu.indptr[item])*scaling_factor])
(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopPop_estimations.csv', index=False, header=False)
Self made global average
GlobalAvg=[]
avg=np.sum(train_ui)/train_ui.nnz
for i in range(train_iu.shape[0]):
GlobalAvg.append((i, avg))
k=10
result=[]
for u in range(train_ui.shape[0]):
user_rated=train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]
rec_user=[]
item_pos=0
while len(rec_user)<10:
if GlobalAvg[item_pos][0] not in user_rated:
rec_user.append((item_code_id[GlobalAvg[item_pos][0]], GlobalAvg[item_pos][1]))
item_pos+=1
result.append([user_code_id[u]]+list(chain(*rec_user)))
(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_GlobalAvg_reco.csv', index=False, header=False)
# estimations - score is a bit artificial since that method is not designed for scoring, but for ranking
estimations=[]
for user, item in zip(*test_ui.nonzero()):
estimations.append([user_code_id[user], item_code_id[item], avg])
(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_GlobalAvg_estimations.csv', index=False, header=False)
pd.DataFrame(result)[:2]
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 5 | 3.529975 | 10 | 3.529975 | 25 | 3.529975 | 32 | 3.529975 | 33 | 3.529975 | 44 | 3.529975 | 46 | 3.529975 | 50 | 3.529975 | 52 | 3.529975 | 55 | 3.529975 |
1 | 2 | 1 | 3.529975 | 2 | 3.529975 | 3 | 3.529975 | 4 | 3.529975 | 5 | 3.529975 | 6 | 3.529975 | 7 | 3.529975 | 8 | 3.529975 | 9 | 3.529975 | 11 | 3.529975 |
Project task 1 - self made top rated
# project task 1: implement TopRated
# Implement recommender system which will recommend movies (which user hasn't seen) with the highest average rating
# The output should be saved in 'Recommendations generated/ml-100k/Self_TopRated_reco.csv'
# and 'Recommendations generated/ml-100k/Self_TopRated_estimations.csv'
TopRat=[]
train_iu=train_ui.transpose().tocsr()
scaling_factor=train_ui.max()/max(np.diff(train_iu.indptr))
print(train_ui)
for i in range(train_iu.shape[0]):
avg = np.sum(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]])/np.count_nonzero(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]]) #średnia dla wierszy(itemów), których użytkownik nie widział
TopRat.append((i, avg))
TopRat.sort(key=lambda x: x[1], reverse=True)
k=10
result=[]
for u in range(train_ui.shape[0]):
user_rated=train_ui.indices[train_ui.indptr[u]:train_ui.indptr[u+1]]
rec_user=[]
item_pos=0
while len(rec_user)<10:
if TopRat[item_pos][0] not in user_rated:
rec_user.append((item_code_id[TopRat[item_pos][0]], TopRat[item_pos][1]))
item_pos+=1
result.append([user_code_id[u]]+list(chain(*rec_user)))
(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopRated_reco.csv', index=False, header=False)
estimations=[]
for user, item in zip(*test_ui.nonzero()):
estimations.append([user_code_id[user], item_code_id[item],
(train_iu.indptr[item+1]-train_iu.indptr[item])*scaling_factor])
(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopRated_estimations.csv', index=False, header=False)
(0, 0) 5 (0, 1) 3 (0, 2) 4 (0, 3) 3 (0, 5) 5 (0, 6) 4 (0, 7) 1 (0, 8) 5 (0, 10) 2 (0, 11) 5 (0, 12) 5 (0, 13) 5 (0, 14) 5 (0, 15) 5 (0, 16) 3 (0, 17) 4 (0, 18) 5 (0, 19) 4 (0, 20) 1 (0, 21) 4 (0, 22) 4 (0, 23) 3 (0, 25) 3 (0, 26) 2 (0, 27) 4 : : (942, 671) 5 (942, 684) 4 (942, 716) 4 (942, 719) 1 (942, 721) 3 (942, 723) 1 (942, 731) 4 (942, 738) 4 (942, 755) 2 (942, 764) 3 (942, 784) 2 (942, 795) 3 (942, 807) 4 (942, 815) 4 (942, 823) 4 (942, 824) 3 (942, 830) 2 (942, 839) 4 (942, 940) 1 (942, 942) 5 (942, 1010) 2 (942, 1027) 2 (942, 1043) 3 (942, 1046) 2 (942, 1329) 3
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:12: RuntimeWarning: invalid value encountered in longlong_scalars if sys.path[0] == '':
pd.DataFrame(result)[:2]
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 814 | 5.0 | 1122 | 5.0 | 1189 | 5.0 | 1201 | 5.0 | 408 | 4.5 | 318 | 4.485356 | 483 | 4.466667 | 513 | 4.400000 | 603 | 4.392638 | 50 | 4.385106 |
1 | 2 | 119 | 5.0 | 814 | 5.0 | 1122 | 5.0 | 1189 | 5.0 | 1201 | 5.0 | 114 | 4.509091 | 408 | 4.500000 | 169 | 4.494505 | 318 | 4.485356 | 483 | 4.466667 |
Self-made baseline
class selfBaselineUI():
def fit(self, train_ui):
self.train_ui=train_ui.copy()
self.train_iu=train_ui.transpose().tocsr()
result=self.train_ui.copy()
self.row_means=np.asarray(result.sum(axis=1).ravel())[0]/np.diff(result.indptr)
# in csr format after addition or multiplication 0 entries "disappear" - so some workaraunds are needed
# (other option is to define addition/multiplication in a desired way)
row_means=self.row_means.copy()
max_row_mean=np.max(row_means)
row_means[row_means==0]=max_row_mean+1
to_subtract_rows=sparse.diags(row_means)*result.power(0)
to_subtract_rows.sort_indices() # needed to have valid .data
subtract=to_subtract_rows.data
subtract[subtract==max_row_mean+1]=0
result.data=result.data-subtract
# we can't do result=train_ui-to_subtract_rows since then 0 entries will "disappear" in csr format
self.col_means=np.divide(np.asarray(result.sum(axis=0).ravel())[0], np.diff(self.train_iu.indptr),\
out=np.zeros(self.train_iu.shape[0]), where=np.diff(self.train_iu.indptr)!=0) # handling items without ratings
# again - it is possible that some mean will be zero, so let's use the same workaround
col_means=self.col_means.copy()
max_col_mean=np.max(col_means)
col_means[col_means==0]=max_col_mean+1
to_subtract_cols=result.power(0)*sparse.diags(col_means)
to_subtract_cols.sort_indices() # needed to have valid .data
subtract=to_subtract_cols.data
subtract[subtract==max_col_mean+1]=0
result.data=result.data-subtract
return result
def recommend(self, user_code_id, item_code_id, topK=10):
estimations=np.tile(self.row_means[:,None], [1, self.train_ui.shape[1]]) +np.tile(self.col_means, [self.train_ui.shape[0], 1])
top_k = defaultdict(list)
for nb_user, user in enumerate(estimations):
user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]
for item, score in enumerate(user):
if item not in user_rated:
top_k[user_code_id[nb_user]].append((item_code_id[item], score))
result=[]
# Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)
for uid, item_scores in top_k.items():
item_scores.sort(key=lambda x: x[1], reverse=True)
result.append([uid]+list(chain(*item_scores[:topK])))
return result
def estimate(self, user_code_id, item_code_id, test_ui):
result=[]
for user, item in zip(*test_ui.nonzero()):
result.append([user_code_id[user], item_code_id[item], self.row_means[user]+self.col_means[item]])
return result
toy_train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\t', header=None, names=['user', 'item', 'rating', 'timestamp'])
toy_test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None, names=['user', 'item', 'rating', 'timestamp'])
toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \
toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)
print('Training data:')
display(toy_train_ui.todense())
model=selfBaselineUI()
print('After subtracting rows and columns:')
display(model.fit(toy_train_ui).todense())
print('Recommend best unseen item:')
display(model.recommend(toy_user_code_id, toy_item_code_id, topK=1))
print('Print estimations on unseen items:')
estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_ui))
estimations.columns=['user', 'item', 'est_score']
display(estimations)
top_n=pd.DataFrame(model.recommend(toy_user_code_id, toy_item_code_id, topK=3))
top_n.to_csv('Recommendations generated/toy-example/Self_BaselineUI_reco.csv', index=False, header=False)
estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_ui))
estimations.to_csv('Recommendations generated/toy-example/Self_BaselineUI_estimations.csv', index=False, header=False)
Training data:
matrix([[5, 3, 4, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [5, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0]], dtype=int64)
After subtracting rows and columns:
matrix([[ 1.18107144, -0.28752307, 0.84558432, ..., 0. , 0. , 0. ], [ 0. , 0. , 0. , ..., 0. , 0. , 0. ], [ 0. , 0. , 0. , ..., 0. , 0. , 0. ], ..., [ 0.73611731, 0. , 0. , ..., 0. , 0. , 0. ], [ 0. , 0. , 0. , ..., 0. , 0. , 0. ], [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]])
Recommend best unseen item:
[[1, 814, 5.449584477962646], [2, 814, 5.580813116206935], [3, 814, 4.615468838961153], [4, 814, 6.244538606403013], [5, 814, 4.738510237608686], [6, 814, 5.543023454887861], [7, 814, 5.860377115719784], [8, 814, 5.758174970039377], [9, 814, 6.283427495291902], [10, 814, 6.125307837172244], [11, 814, 5.279673741538148], [12, 814, 6.326356788221195], [13, 1491, 4.782976186496395], [14, 814, 6.015020534113856], [15, 814, 4.86157157343598], [16, 814, 6.196262744334048], [17, 814, 4.894538606403013], [18, 814, 5.789515775352784], [19, 814, 5.365126841697131], [20, 814, 5.144538606403013], [21, 814, 4.545545317812409], [22, 814, 5.142063358878261], [23, 814, 5.585595516972118], [24, 814, 6.31120527306968], [25, 814, 5.981495128142144], [26, 814, 4.832038606403013], [27, 814, 4.99980176429775], [28, 814, 5.663769375633782], [29, 814, 5.465967177831585], [30, 814, 5.55168146354587], [31, 814, 5.794538606403012], [32, 814, 5.144538606403013], [33, 814, 5.644538606403013], [34, 814, 5.961205273069679], [35, 814, 4.9533621358147775], [36, 814, 5.56120527306968], [37, 814, 5.6506361673786225], [38, 814, 5.511559882998758], [39, 814, 5.594538606403013], [40, 814, 4.787395749260156], [41, 814, 5.656443368307775], [42, 814, 5.605948002376167], [43, 814, 5.622799475968231], [44, 814, 5.500837819001438], [45, 814, 5.419538606403012], [46, 814, 6.076356788221195], [47, 814, 5.420854395876697], [48, 814, 5.7419962335216574], [49, 814, 4.589940905253588], [50, 814, 5.4945386064030135], [51, 814, 5.4945386064030135], [52, 814, 6.138441045427403], [53, 814, 5.712720424584831], [54, 814, 5.654538606403013], [55, 814, 5.294538606403012], [56, 814, 5.581613436334986], [57, 814, 5.444538606403013], [58, 814, 5.671324320688727], [59, 814, 5.835896631094371], [60, 814, 5.947793044272835], [61, 814, 5.027871939736347], [62, 814, 5.165815202147694], [63, 814, 4.976730387224931], [64, 814, 5.490692452556859], [65, 814, 5.879832724050072], [66, 814, 5.427871939736346], [67, 814, 5.814538606403013], [68, 814, 4.524168236032643], [69, 814, 5.62370527306968], [70, 814, 5.375307837172244], [71, 814, 5.582038606403013], [72, 814, 5.785294908924021], [73, 814, 5.585447697312103], [74, 814, 5.50168146354587], [75, 814, 5.1099232217876285], [76, 814, 5.468309098206292], [77, 814, 5.511205273069679], [78, 814, 5.227871939736346], [79, 814, 5.894538606403013], [80, 814, 5.574538606403014], [81, 814, 5.1672658791302855], [82, 814, 4.978454690319097], [83, 814, 5.256077067941474], [84, 814, 5.759923221787629], [85, 814, 5.43133514319955], [86, 814, 5.279153991018397], [87, 814, 5.667544741372338], [88, 814, 5.789275448508276], [89, 814, 5.894538606403013], [90, 814, 6.155222367086774], [91, 814, 5.769538606403013], [92, 814, 5.153324548895026], [93, 814, 5.439993151857559], [94, 814, 5.54897285716754], [95, 814, 5.309285150181815], [96, 814, 6.1218113336757405], [97, 814, 6.12370527306968], [98, 814, 5.544538606403012], [99, 814, 5.399300511164918], [100, 814, 5.057803912525462], [101, 814, 4.8368462987107055], [102, 814, 4.496877787689563], [103, 814, 5.524168236032643], [104, 814, 4.564993151857559], [105, 814, 5.457038606403013], [106, 814, 5.630387663006786], [107, 814, 4.659244488755954], [108, 814, 5.394538606403013], [109, 814, 5.308517101026669], [110, 814, 4.885279347143754], [111, 814, 5.370729082593489], [112, 814, 5.544538606403012], [113, 814, 5.577465435671305], [114, 814, 5.544538606403012], [115, 814, 5.7878719397363465], [116, 814, 4.855703654946702], [117, 814, 5.848384760249167], [118, 814, 6.5386064030131825], [119, 814, 5.817073817670618], [120, 814, 5.303629515493922], [121, 814, 5.481840193704601], [122, 814, 5.790371939736346], [123, 814, 5.735447697312104], [124, 814, 5.329321215098665], [125, 814, 5.387959659034592], [126, 814, 5.580252892117299], [127, 814, 5.894538606403013], [128, 814, 5.525744280161878], [129, 814, 4.372799475968231], [130, 814, 5.812688072595184], [131, 814, 6.009923221787628], [132, 814, 5.582038606403013], [133, 814, 5.06120527306968], [134, 814, 5.6672658791302855], [135, 814, 5.266631629658827], [136, 814, 6.16120527306968], [137, 814, 6.194538606403013], [138, 814, 6.150352559891385], [139, 814, 5.851060345533448], [140, 814, 5.207038606403013], [141, 814, 5.329832724050071], [142, 814, 5.5868462987107055], [143, 814, 5.8357150769912485], [144, 814, 5.569237401583736], [145, 814, 5.231888003993374], [146, 814, 5.633669041185621], [147, 814, 6.005649717514124], [148, 814, 5.894538606403013], [149, 814, 4.738288606403013], [150, 814, 5.817615529479935], [151, 814, 5.910865137015258], [152, 814, 6.186673437863687], [153, 814, 4.96120527306968], [154, 814, 5.72380689908594], [155, 814, 4.394538606403013], [156, 814, 5.5868462987107055], [157, 814, 5.657696501139855], [158, 814, 5.754678466542873], [159, 814, 5.366448718762563], [160, 814, 5.791445822897858], [161, 814, 4.445559014566278], [162, 814, 5.523110034974442], [163, 814, 5.06120527306968], [164, 814, 5.97146168332609], [165, 814, 5.939993151857559], [166, 814, 5.56120527306968], [167, 814, 5.280503518683714], [168, 814, 5.256607571920254], [169, 814, 5.994538606403013], [170, 814, 5.6840122906135395], [171, 814, 5.356077067941475], [172, 814, 5.1218113336757405], [173, 814, 6.0837277955922024], [174, 814, 5.507214662741041], [175, 814, 5.707038606403013], [176, 814, 5.644538606403013], [177, 814, 5.448886432489969], [178, 814, 5.56418146354587], [179, 814, 4.832038606403013], [180, 814, 5.914146449540268], [181, 814, 3.372192237687929], [182, 814, 5.727871939736346], [183, 814, 5.1628312893298425], [184, 814, 5.583657777387469], [185, 814, 6.016489825915208], [186, 814, 5.314828461475477], [187, 814, 6.0467125194464915], [188, 814, 5.588656253461837], [189, 814, 5.92156563343004], [190, 814, 5.259923221787629], [191, 814, 5.576356788221195], [192, 814, 5.394538606403013], [193, 814, 5.0514013515010525], [194, 814, 4.88209047362293], [195, 814, 5.227871939736346], [196, 814, 5.500599212463619], [197, 814, 5.281635380596562], [198, 814, 5.169706391637913], [199, 814, 4.926796670919142], [200, 814, 5.888478000342407], [201, 814, 4.903884400795537], [202, 814, 4.832038606403013], [203, 814, 5.175788606403013], [204, 814, 5.526117553771434], [205, 814, 4.8357150769912485], [206, 814, 4.188656253461836], [207, 814, 5.139103823794317], [208, 814, 5.6672658791302855], [209, 814, 5.125307837172244], [210, 814, 5.965967177831584], [211, 814, 4.9551446670090735], [212, 814, 5.794538606403012], [213, 814, 6.123110034974442], [214, 814, 5.7605179878463115], [215, 814, 5.675360524211232], [216, 814, 5.733824320688727], [217, 814, 4.86120527306968], [218, 814, 5.508174970039377], [219, 814, 5.799300511164917], [220, 814, 5.631380711666171], [221, 814, 5.4945386064030135], [222, 814, 4.9621061739705805], [223, 814, 4.894538606403013], [224, 814, 4.886274143593095], [225, 814, 6.370729082593489], [226, 814, 5.710328080087224], [227, 814, 5.227871939736346], [228, 814, 5.427871939736346], [229, 814, 4.614538606403013], [230, 814, 5.736122764818854], [231, 814, 5.582038606403013], [232, 814, 5.841205273069679], [233, 814, 6.269538606403013], [234, 814, 5.047353619807838], [235, 814, 5.857501569365976], [236, 814, 5.377297227092669], [237, 814, 6.013586225450632], [238, 814, 5.094538606403013], [239, 814, 6.101150176650947], [240, 814, 5.776891547579483], [241, 814, 5.262959659034592], [242, 814, 6.294538606403013], [243, 814, 5.577078288942696], [244, 814, 5.518249946609199], [245, 814, 5.094538606403013], [246, 814, 4.7679563279220005], [247, 814, 5.939993151857559], [248, 814, 5.553629515493922], [249, 814, 6.026891547579484], [250, 814, 5.722495595650325], [251, 814, 5.658174970039377], [252, 814, 6.207038606403013], [253, 814, 5.868897580761987], [254, 814, 5.002230914095321], [255, 814, 4.526891547579483], [256, 814, 5.913057124921531], [257, 814, 5.94215765402206], [258, 814, 5.894538606403013], [259, 814, 5.79197450383891], [260, 814, 6.116760828625235], [261, 814, 5.794538606403012], [262, 814, 5.019538606403013], [263, 814, 5.963845537096082], [264, 814, 6.137257053004955], [265, 814, 5.516160228024635], [266, 814, 4.947170185350382], [267, 814, 5.880745502954737], [268, 814, 4.823950371108895], [269, 814, 4.95382714395242], [270, 814, 6.193604026963761], [271, 814, 5.473485974824065], [272, 814, 6.3469195587839655], [273, 814, 5.5135862254506325], [274, 814, 6.052433343245118], [275, 814, 4.997102708967116], [276, 814, 5.346390458254865], [277, 814, 5.429422327333246], [278, 814, 6.247479782873601], [279, 814, 5.217023813503605], [280, 814, 5.499300511164918], [281, 814, 5.227871939736346], [282, 814, 5.283427495291901], [283, 814, 5.985447697312104], [284, 814, 5.4945386064030135], [285, 814, 5.981495128142144], [286, 814, 5.62370527306968], [287, 814, 6.009923221787628], [288, 814, 5.75168146354587], [289, 814, 4.894538606403013], [290, 814, 5.233669041185621], [291, 814, 5.501233167072469], [292, 814, 6.09245527306968], [293, 814, 4.939993151857559], [294, 814, 5.346712519446491], [295, 814, 6.161619351744628], [296, 814, 6.024973389011708], [297, 814, 5.310123021987429], [298, 814, 5.913057124921531], [299, 814, 5.327096745937897], [300, 814, 5.718068018167719], [301, 814, 5.449084060948468], [302, 814, 4.394538606403013], [303, 814, 5.29980176429775], [304, 814, 5.157696501139855], [305, 814, 5.320768114599734], [306, 814, 5.66873215479011], [307, 814, 5.701356788221195], [308, 814, 5.6351636064030135], [309, 814, 5.427871939736346], [310, 814, 5.672316384180791], [311, 814, 5.6554081716204045], [312, 814, 6.275754076016272], [313, 814, 5.500307837172244], [314, 814, 5.587221533232281], [315, 814, 5.894538606403013], [316, 814, 5.352165725047081], [317, 814, 5.394538606403013], [318, 814, 5.6013055236962455], [319, 814, 5.368222816929329], [320, 814, 5.598538606403013], [321, 814, 5.631912343776751], [322, 814, 6.1576965011398554], [323, 814, 5.489475315263773], [324, 814, 6.363926361505054], [325, 814, 5.296324320688727], [326, 814, 5.299944011808418], [327, 814, 5.1954645323289395], [328, 814, 5.378701502330615], [329, 814, 5.328500870553956], [330, 814, 6.394538606403013], [331, 814, 5.599456639189898], [332, 814, 6.040371939736346], [333, 814, 5.758174970039377], [334, 814, 5.394538606403013], [335, 814, 5.418348130212537], [336, 814, 4.847368795082258], [337, 814, 5.794538606403012], [338, 814, 5.961205273069679], [339, 814, 5.930996939736346], [340, 814, 6.029673741538148], [341, 814, 5.616760828625235], [342, 814, 5.3347901787300565], [343, 814, 5.899973389011708], [344, 814, 5.532836478743438], [345, 814, 5.523980230768496], [346, 814, 5.199174367992416], [347, 814, 5.530902242766649], [348, 814, 5.731747908728595], [349, 814, 4.9945386064030135], [350, 814, 6.150352559891385], [351, 814, 6.005649717514124], [352, 814, 5.6218113336757405], [353, 814, 5.315591237981961], [354, 814, 5.679484842962153], [355, 814, 5.93620527306968], [356, 814, 5.338983050847458], [357, 814, 5.979284369114877], [358, 814, 5.519538606403013], [359, 814, 5.894538606403013], [360, 814, 5.894538606403013], [361, 814, 5.6711343510838645], [362, 814, 5.212720424584831], [363, 814, 4.96478654028731], [364, 814, 5.365126841697131], [365, 814, 5.256240734062588], [366, 814, 6.163769375633782], [367, 814, 6.054538606403013], [368, 814, 5.2373957492601555], [369, 814, 5.894538606403013], [370, 814, 5.517489426075144], [371, 814, 6.065270313720086], [372, 814, 6.241477381913217], [373, 814, 5.5400412519056585], [374, 814, 5.186574004633101], [375, 814, 5.6218113336757405], [376, 814, 5.6218113336757405], [377, 814, 5.764103823794317], [378, 814, 5.2754909873553935], [379, 814, 5.875430326148235], [380, 814, 5.107137031599864], [381, 814, 5.750208709495796], [382, 814, 5.382343484451793], [383, 814, 6.267087626010856], [384, 814, 6.012185665226542], [385, 814, 5.26091791674784], [386, 814, 5.644538606403013], [387, 814, 5.159001416320368], [388, 814, 6.085014796879204], [389, 814, 5.557904943036677], [390, 814, 5.740692452556859], [391, 814, 5.666277736837795], [392, 814, 5.883669041185621], [393, 814, 5.264166113566336], [394, 814, 5.77279947596823], [395, 814, 5.834538606403013], [396, 814, 5.284782508842037], [397, 814, 5.7635862254506325], [398, 814, 5.581105770582118], [399, 814, 4.851569856403013], [400, 814, 5.60042095934419], [401, 814, 5.026770011361691], [402, 814, 5.698460175030464], [403, 814, 5.578749132718802], [404, 814, 5.033427495291901], [405, 814, 3.7205521199165266], [406, 814, 5.416760828625235], [407, 814, 5.326010687621287], [408, 814, 5.712720424584831], [409, 814, 5.482038606403012], [410, 814, 4.989776701641109], [411, 814, 5.644538606403013], [412, 814, 5.7754909873553935], [413, 814, 5.576356788221195], [414, 814, 5.631380711666171], [415, 814, 5.99980176429775], [416, 814, 5.728291755017622], [417, 814, 5.137496352881886], [418, 814, 4.6840122906135395], [419, 814, 6.0868462987107055], [420, 814, 6.19083490269931], [421, 814, 5.754538606403013], [422, 814, 5.435715076991248], [423, 814, 5.46057634225207], [424, 814, 5.184861187048174], [425, 814, 4.7526031225320455], [426, 814, 5.721699100230174], [427, 814, 6.4145386064030125], [428, 814, 5.740692452556859], [429, 814, 5.317962029826436], [430, 814, 5.426453500020035], [431, 814, 5.306303312285365], [432, 814, 5.660496053211524], [433, 814, 5.470296182160588], [434, 814, 5.56120527306968], [435, 814, 5.222353175939437], [436, 814, 5.627297227092669], [437, 814, 5.459959167150677], [438, 814, 5.977871939736346], [439, 814, 5.677147302055187], [440, 814, 6.088983050847458], [441, 814, 5.423950371108895], [442, 814, 5.003782303882005], [443, 814, 5.420854395876697], [444, 814, 5.694538606403013], [445, 814, 3.791734868085256], [446, 814, 4.524168236032643], [447, 814, 5.477147302055187], [448, 814, 5.269538606403013], [449, 814, 5.613288606403013], [450, 814, 5.7631354883629236], [451, 814, 4.582038606403013], [452, 814, 5.335014796879204], [453, 814, 5.170129157584116], [454, 814, 4.803140756940648], [455, 814, 5.3844714923090535], [456, 814, 5.36120527306968], [457, 814, 5.984628696493103], [458, 814, 5.556607571920255], [459, 814, 5.358456132176209], [460, 814, 5.377297227092669], [461, 814, 4.838983050847458], [462, 814, 6.009923221787628], [463, 814, 4.724727285648296], [464, 814, 5.844538606403013], [465, 814, 5.287981229353832], [466, 814, 5.400562702788555], [467, 814, 5.543187255051661], [468, 814, 5.860640301318267], [469, 814, 6.394538606403013], [470, 814, 5.429422327333246], [471, 814, 5.240692452556859], [472, 814, 6.107808748583108], [473, 814, 5.86120527306968], [474, 814, 5.98014171924348], [475, 814, 5.427871939736346], [476, 814, 5.271587786730882], [477, 814, 6.342814468471978], [478, 814, 5.294538606403012], [479, 814, 5.362487324351731], [480, 814, 5.7022309140953205], [481, 814, 5.955763096198932], [482, 814, 5.2858429542291], [483, 814, 4.937091797892375], [484, 814, 5.83268293629992], [485, 814, 4.769538606403013], [486, 814, 5.132633844498251], [487, 814, 5.573886432489969], [488, 814, 5.25168146354587], [489, 814, 5.632633844498251], [490, 814, 4.644538606403013], [491, 814, 5.644538606403013], [492, 814, 5.4172658791302855], [493, 814, 5.574538606403014], [494, 814, 5.783427495291901], [495, 814, 5.8886214466397], [496, 814, 4.950094161958568], [497, 814, 5.125307837172244], [498, 814, 5.254898966763373], [499, 814, 5.804795016659423], [500, 814, 5.242364693359535], [501, 814, 5.62668146354587], [502, 814, 4.825573089161633], [503, 814, 5.979153991018397], [504, 814, 5.526614078101126], [505, 814, 5.331320215598415], [506, 814, 5.5661803974477895], [507, 814, 6.545701397100688], [508, 814, 5.784949565307123], [509, 814, 4.308331709851289], [510, 814, 4.712720424584831], [511, 814, 5.704062415926822], [512, 814, 6.262959659034592], [513, 814, 6.210328080087224], [514, 814, 5.765506348338497], [515, 814, 4.927871939736346], [516, 814, 6.082038606403013], [517, 814, 5.281635380596562], [518, 814, 5.555828928983658], [519, 814, 5.948592660457067], [520, 814, 5.338983050847458], [521, 814, 5.092814468471978], [522, 814, 6.303629515493922], [523, 814, 6.250860445483473], [524, 814, 5.414379876244283], [525, 814, 5.180252892117299], [526, 814, 5.277517329807268], [527, 814, 5.726397013482659], [528, 814, 5.526117553771434], [529, 814, 5.427871939736346], [530, 814, 5.783427495291901], [531, 814, 5.174538606403013], [532, 814, 6.004630349522279], [533, 814, 5.31120527306968], [534, 814, 6.017345623946873], [535, 814, 5.841907027455644], [536, 814, 5.721876735899416], [537, 814, 4.737726267071393], [538, 814, 5.618676537437496], [539, 814, 5.851985414913651], [540, 814, 5.6672658791302855], [541, 814, 5.592651813950183], [542, 814, 5.385766676578452], [543, 814, 5.462280541886884], [544, 814, 4.644538606403013], [545, 814, 5.383260410914291], [546, 814, 5.737675861304973], [547, 814, 5.578749132718802], [548, 814, 5.527871939736347], [549, 814, 5.6218113336757405], [550, 814, 5.618676537437496], [551, 814, 5.714461978050522], [552, 814, 4.991760828625235], [553, 814, 6.119538606403013], [554, 814, 5.438983050847457], [555, 814, 5.975619687484094], [556, 814, 6.110754822619229], [557, 814, 5.608824320688727], [558, 814, 6.094538606403013], [559, 814, 5.394538606403013], [560, 814, 5.262354698357036], [561, 814, 4.897951575686289], [562, 814, 5.463504123644393], [563, 814, 5.851060345533448], [564, 814, 5.334538606403013], [565, 814, 6.413057124921531], [566, 814, 5.357953240549355], [567, 814, 5.863769375633782], [568, 814, 5.252747561626894], [569, 814, 5.369114877589453], [570, 814, 4.672316384180791], [571, 814, 5.420854395876697], [572, 814, 5.294538606403012], [573, 814, 5.689410401274808], [574, 814, 5.408052119916526], [575, 814, 5.1218113336757405], [576, 814, 5.509923221787629], [577, 814, 5.711440014853717], [578, 814, 4.727871939736346], [579, 814, 5.378409574144948], [580, 814, 5.644538606403013], [581, 814, 5.994538606403013], [582, 814, 5.06120527306968], [583, 814, 6.370729082593489], [584, 814, 5.4945386064030135], [585, 814, 5.6330001448645515], [586, 814, 5.277997253019555], [587, 814, 4.868897580761987], [588, 814, 5.565126841697131], [589, 814, 5.408052119916526], [590, 814, 5.3357150769912485], [591, 814, 5.539699896725594], [592, 814, 5.731908848617546], [593, 814, 5.499580623209736], [594, 814, 5.394538606403013], [595, 814, 5.1195386064030135], [596, 814, 5.4945386064030135], [597, 814, 5.582038606403013], [598, 814, 5.4945386064030135], [599, 814, 5.8404845523489595], [600, 814, 5.480252892117299], [601, 814, 5.0083597446143955], [602, 814, 5.602871939736346], [603, 814, 5.783427495291901], [604, 814, 5.505649717514125], [605, 814, 5.542425930346675], [606, 814, 5.883669041185621], [607, 814, 5.733248283822368], [608, 814, 5.544945110468054], [609, 814, 3.985447697312104], [610, 814, 5.652603122532046], [611, 814, 5.707038606403013], [612, 814, 5.227871939736346], [613, 814, 6.227871939736346], [614, 814, 5.108824320688727], [615, 814, 5.736002021037159], [616, 814, 5.439993151857559], [617, 814, 4.576356788221195], [618, 814, 5.144538606403013], [619, 814, 5.507441832209464], [620, 814, 5.6968641877983615], [621, 814, 5.373110034974442], [622, 814, 5.280503518683714], [623, 814, 5.705349417213824], [624, 814, 5.428519188927285], [625, 814, 5.4000331118975184], [626, 814, 4.269538606403013], [627, 814, 5.175360524211232], [628, 814, 6.608824320688727], [629, 814, 5.977107413742463], [630, 814, 5.223950371108895], [631, 814, 4.894538606403013], [632, 814, 5.548384760249167], [633, 814, 5.205649717514124], [634, 814, 5.269538606403013], [635, 814, 5.279153991018397], [636, 814, 6.294538606403013], [637, 814, 4.435715076991248], [638, 814, 5.385447697312104], [639, 814, 4.821957961241722], [640, 814, 6.144538606403013], [641, 814, 5.988288606403013], [642, 814, 5.532333881993564], [643, 814, 5.559172752744477], [644, 814, 5.8204645323289395], [645, 814, 5.802584583414507], [646, 814, 4.787395749260156], [647, 814, 5.680252892117299], [648, 814, 5.1711343510838645], [649, 814, 5.616760828625235], [650, 814, 5.004774826875454], [651, 814, 5.227871939736346], [652, 814, 4.957038606403013], [653, 814, 4.5892288718897385], [654, 814, 5.642364693359535], [655, 814, 4.803791275442158], [656, 814, 4.420854395876697], [657, 814, 5.144538606403013], [658, 814, 5.648384760249167], [659, 814, 5.655828928983658], [660, 814, 4.424593251211756], [661, 814, 5.708264096599091], [662, 814, 5.694538606403013], [663, 814, 5.437174265317742], [664, 814, 5.630972714930145], [665, 814, 5.390190780316056], [666, 814, 5.577465435671305], [667, 814, 5.813457525321931], [668, 814, 5.366760828625235], [669, 814, 5.304795016659423], [670, 814, 5.715051426915833], [671, 814, 5.574538606403014], [672, 814, 5.1672658791302855], [673, 814, 5.694538606403013], [674, 814, 5.56120527306968], [675, 814, 5.712720424584831], [676, 814, 5.430252892117299], [677, 814, 5.3817180935825], [678, 814, 5.294538606403012], [679, 814, 5.511559882998758], [680, 814, 5.823110034974441], [681, 814, 4.9533621358147775], [682, 814, 5.026614078101126], [683, 814, 5.185447697312104], [684, 814, 5.366760828625235], [685, 814, 3.894538606403013], [686, 814, 6.483824320688727], [687, 814, 5.338983050847458], [688, 814, 6.672316384180791], [689, 814, 5.707038606403013], [690, 814, 5.037395749260156], [691, 814, 6.048384760249167], [692, 814, 5.136962848827255], [693, 814, 5.019538606403013], [694, 814, 6.079723791588198], [695, 814, 5.00168146354587], [696, 814, 6.014538606403013], [697, 814, 5.540880069817647], [698, 814, 4.708971596093734], [699, 814, 5.065270313720086], [700, 814, 5.659244488755954], [701, 814, 6.0868462987107055], [702, 814, 4.254538606403013], [703, 814, 5.3810250928895], [704, 814, 5.625307837172244], [705, 814, 5.594538606403013], [706, 814, 5.19083490269931], [707, 814, 5.3808774042172205], [708, 814, 4.956267001464742], [709, 814, 5.505649717514125], [710, 814, 5.546053757918164], [711, 814, 5.683110034974442], [712, 814, 5.655732636253759], [713, 814, 4.736643869560908], [714, 814, 5.378409574144948], [715, 814, 5.233000144864551], [716, 814, 5.79540067536853], [717, 814, 5.454538606403013], [718, 814, 5.608824320688727], [719, 814, 4.9714616833260905], [720, 814, 5.938016867272578], [721, 814, 5.541155147756396], [722, 814, 5.569538606403013], [723, 814, 5.144538606403013], [724, 814, 4.17146168332609], [725, 814, 5.594538606403013], [726, 814, 4.849084060948467], [727, 814, 4.952009870770829], [728, 814, 5.394538606403013], [729, 814, 4.56120527306968], [730, 814, 5.207038606403013], [731, 814, 5.430770490460985], [732, 814, 5.894538606403013], [733, 814, 4.959755997707361], [734, 814, 5.4871311989956055], [735, 814, 5.266631629658827], [736, 814, 4.727871939736346], [737, 814, 5.817615529479935], [738, 814, 5.588086993499787], [739, 814, 5.727871939736346], [740, 814, 5.5868462987107055], [741, 814, 5.353362135814778], [742, 814, 5.338983050847458], [743, 814, 5.446262744334048], [744, 814, 5.803629515493922], [745, 814, 5.005649717514125], [746, 814, 5.4910298344731885], [747, 814, 5.943319094207891], [748, 814, 5.549300511164917], [749, 814, 5.466555067308363], [750, 814, 4.9686126804770865], [751, 814, 5.373990661197533], [752, 814, 4.976505819517767], [753, 814, 5.6218113336757405], [754, 814, 5.215967177831585], [755, 814, 5.082038606403013], [756, 814, 5.063369775234182], [757, 814, 5.261445081223156], [758, 814, 5.782219765823303], [759, 814, 5.93157564344005], [760, 814, 5.012185665226543], [761, 814, 4.719100009911784], [762, 814, 5.012185665226543], [763, 814, 5.692518404382811], [764, 814, 5.591167819886159], [765, 814, 5.727871939736346], [766, 814, 5.201107949468707], [767, 814, 6.281635380596561], [768, 814, 5.045482002629428], [769, 814, 4.9686126804770865], [770, 814, 6.210328080087224], [771, 814, 5.60042095934419], [772, 814, 5.894538606403013], [773, 814, 5.1693477667083565], [774, 814, 3.9429257031772065], [775, 814, 5.694538606403013], [776, 814, 5.376020087884495], [777, 814, 5.894538606403013], [778, 814, 4.783427495291901], [779, 814, 5.963504123644392], [780, 814, 5.812905953341788], [781, 814, 5.707038606403013], [782, 814, 4.656196637491096], [783, 814, 5.779153991018397], [784, 814, 5.769538606403013], [785, 814, 5.663769375633782], [786, 814, 5.714763325504137], [787, 814, 5.116760828625235], [788, 814, 5.20183027306968], [789, 814, 5.644538606403013], [790, 814, 4.894538606403013], [791, 814, 5.844538606403013], [792, 814, 5.208824320688727], [793, 814, 5.433000144864551], [794, 814, 6.180252892117299], [795, 814, 5.003065738185959], [796, 814, 5.493829386544856], [797, 814, 4.694538606403013], [798, 814, 5.1458475069265734], [799, 814, 5.8469195587839655], [800, 814, 5.590190780316057], [801, 814, 5.939993151857559], [802, 814, 5.377871939736346], [803, 814, 4.965967177831585], [804, 814, 5.583254559710406], [805, 814, 5.262185665226543], [806, 814, 5.523848951230599], [807, 814, 5.812777600113705], [808, 814, 6.071009194638307], [809, 814, 5.188656253461836], [810, 814, 6.323110034974442], [811, 814, 6.071009194638307], [812, 814, 5.247479782873601], [813, 814, 4.939993151857559], [814, 814, 4.779153991018397], [815, 814, 5.739609028938224], [816, 814, 5.736643869560908], [817, 814, 5.294538606403012], [818, 814, 5.36120527306968], [819, 814, 6.1218113336757405], [820, 814, 5.012185665226543], [821, 814, 6.283427495291902], [822, 814, 5.144538606403013], [823, 814, 5.951142379987918], [824, 814, 4.394538606403013], [825, 814, 5.509292704763668], [826, 814, 5.712720424584831], [827, 814, 5.105064922192486], [828, 814, 5.103840931984408], [829, 814, 5.362623712785991], [830, 814, 5.69215765402206], [831, 814, 5.412395749260156], [832, 814, 4.894538606403013], [833, 814, 4.889799269910122], [834, 814, 5.849084060948467], [835, 814, 5.965967177831584], [836, 814, 5.780252892117298], [837, 814, 4.894538606403013], [838, 814, 6.012959659034592], [839, 814, 4.9583683936370555], [840, 814, 6.098620239056074], [841, 814, 5.779153991018397], [842, 814, 5.694538606403013], [843, 814, 4.673019619061241], [844, 814, 5.327374427298535], [845, 814, 4.933000144864551], [846, 814, 5.601855679573745], [847, 814, 4.938016867272578], [848, 814, 6.2142107375505535], [849, 814, 6.736643869560908], [850, 814, 6.369538606403013], [851, 814, 5.3425905544549614], [852, 814, 5.4621061739705805], [853, 814, 4.980252892117299], [854, 814, 5.1402528921172985], [855, 814, 5.283427495291901], [856, 814, 5.56120527306968], [857, 814, 5.207038606403013], [858, 814, 5.227871939736346], [859, 814, 5.629832724050072], [860, 814, 5.16873215479011], [861, 814, 5.843256555120962], [862, 814, 6.260392264939599], [863, 814, 5.1244236638742775], [864, 814, 5.743744955609362], [865, 814, 4.2214616833260905], [866, 814, 4.56120527306968], [867, 814, 6.1628312893298425], [868, 814, 4.846634414786246], [869, 814, 4.715051426915833], [870, 814, 5.296407765281518], [871, 814, 5.362623712785991], [872, 814, 5.340967177831585], [873, 814, 4.644538606403013], [874, 814, 5.663769375633782], [875, 814, 6.039275448508276], [876, 814, 6.172316384180791], [877, 814, 5.771461683326089], [878, 814, 5.128772840637247], [879, 814, 5.677147302055187], [880, 814, 5.304708097928437], [881, 814, 5.191189324106363], [882, 814, 5.973485974824065], [883, 814, 5.8447648507469045], [884, 814, 5.63647409027398], [885, 814, 5.301515350589059], [886, 814, 5.252433343245118], [887, 814, 5.746651282459351], [888, 814, 6.247479782873601], [889, 814, 5.290834902699309], [890, 814, 5.696518804422815], [891, 814, 6.005649717514124], [892, 814, 5.859856525478158], [893, 814, 5.472316384180791], [894, 814, 5.513350487591132], [895, 814, 5.694538606403013], [896, 814, 4.855806212036816], [897, 814, 5.86309206552251], [898, 814, 5.394538606403013], [899, 814, 5.4365946811693675], [900, 814, 4.526117553771434], [901, 814, 5.72106921864791], [902, 814, 5.344538606403013], [903, 814, 5.765228261575427], [904, 814, 5.705349417213824], [905, 814, 5.194538606403013], [906, 814, 5.659244488755954], [907, 814, 6.501095983452193], [908, 814, 5.4290213650237025], [909, 814, 6.227871939736346], [910, 814, 5.014538606403013], [911, 814, 5.605064922192486], [912, 814, 5.681772648956205], [913, 814, 5.488598012343607], [914, 814, 4.841907027455644], [915, 814, 4.894538606403013], [916, 814, 5.230604180173504], [917, 814, 5.411779985713357], [918, 814, 5.279153991018397], [919, 814, 5.388983050847457], [920, 814, 5.068451649881274], [921, 814, 5.1554081716204045], [922, 814, 5.277716176496471], [923, 814, 6.052433343245118], [924, 814, 5.697568909433317], [925, 814, 4.965967177831585], [926, 814, 5.247479782873601], [927, 814, 5.568957211054176], [928, 814, 6.5868462987107055], [929, 814, 5.616760828625235], [930, 814, 4.874930763265758], [931, 814, 5.68620527306968], [932, 814, 5.868222816929329], [933, 814, 4.5742771684945165], [934, 814, 5.530902242766649], [935, 814, 5.773326485190892], [936, 814, 5.678322390186796], [937, 814, 5.197568909433317], [938, 814, 5.246186958051364], [939, 814, 6.236643869560908], [940, 814, 5.462439840970914], [941, 814, 5.894538606403013], [942, 814, 6.164379876244283], [943, 814, 5.284782508842037]]
Print estimations on unseen items:
user | item | est_score | |
---|---|---|---|
0 | 1 | 5 | 3.457161 |
1 | 1 | 10 | 3.798540 |
2 | 1 | 25 | 3.435415 |
3 | 1 | 32 | 3.732018 |
4 | 1 | 33 | 3.531991 |
... | ... | ... | ... |
19995 | 943 | 928 | 2.907189 |
19996 | 943 | 1067 | 3.485929 |
19997 | 943 | 1074 | 2.861988 |
19998 | 943 | 1188 | 2.727428 |
19999 | 943 | 1228 | 2.568442 |
20000 rows × 3 columns
model=selfBaselineUI()
model.fit(train_ui)
top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))
top_n.to_csv('Recommendations generated/ml-100k/Self_BaselineUI_reco.csv', index=False, header=False)
estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))
estimations.to_csv('Recommendations generated/ml-100k/Self_BaselineUI_estimations.csv', index=False, header=False)
project task 2: implement self-made BaselineIU
# Implement recommender system which will recommend movies (which user hasn't seen) which is similar to BaselineUI
# but first subtract col means then row means
# The output should be saved in 'Recommendations generated/ml-100k/Self_BaselineIU_reco.csv'
# and 'Recommendations generated/ml-100k/Self_BaselineIU_estimations.csv'
class selfBaselineIU():
def fit(self, train_ui):
self.train_ui=train_ui.copy()
self.train_iu=train_ui.transpose().tocsr()
result=self.train_ui.copy()
self.col_means=np.divide(np.asarray(result.sum(axis=0).ravel())[0], np.diff(self.train_iu.indptr),\
out=np.zeros(self.train_iu.shape[0]), where=np.diff(self.train_iu.indptr)!=0) # handling items without ratings
col_means=self.col_means.copy()
max_col_mean=np.max(col_means)
col_means[col_means==0]=max_col_mean+1
to_subtract_cols=result.power(0)*sparse.diags(col_means)
to_subtract_cols.sort_indices() # needed to have valid .data
subtract=to_subtract_cols.data
subtract[subtract==max_col_mean+1]=0
result.data=result.data-subtract
self.row_means=np.asarray(result.sum(axis=1).ravel())[0]/np.diff(result.indptr)
row_means=self.row_means.copy()
max_row_mean=np.max(row_means)
row_means[row_means==0]=max_row_mean+1
to_subtract_rows=sparse.diags(row_means)*result.power(0)
to_subtract_rows.sort_indices() # needed to have valid .data
subtract=to_subtract_rows.data
subtract[subtract==max_row_mean+1]=0
result.data=result.data-subtract
return result
def recommend(self, user_code_id, item_code_id, topK=10):
estimations=np.tile(self.row_means[:,None], [1, self.train_ui.shape[1]]) +np.tile(self.col_means, [self.train_ui.shape[0], 1])
top_k = defaultdict(list)
for nb_user, user in enumerate(estimations):
user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]
for item, score in enumerate(user):
if item not in user_rated:
top_k[user_code_id[nb_user]].append((item_code_id[item], score))
result=[]
for uid, item_scores in top_k.items():
item_scores.sort(key=lambda x: x[1], reverse=True)
result.append([uid]+list(chain(*item_scores[:topK])))
return result
def estimate(self, user_code_id, item_code_id, test_ui):
result=[]
for user, item in zip(*test_ui.nonzero()):
result.append([user_code_id[user], item_code_id[item], self.row_means[user]+self.col_means[item]])
return result
toy_train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\t', header=None, names=['user', 'item', 'rating', 'timestamp'])
toy_test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None, names=['user', 'item', 'rating', 'timestamp'])
toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \
toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)
print('Training data:')
display(toy_train_ui.todense())
model=selfBaselineIU()
print('After subtracting rows and columns:')
display(model.fit(toy_train_ui).todense())
print('Recommend best unseen item:')
display(model.recommend(toy_user_code_id, toy_item_code_id, topK=1))
print('Print estimations on unseen items:')
estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_ui))
estimations.columns=['user', 'item', 'est_score']
display(estimations)
top_n=pd.DataFrame(model.recommend(toy_user_code_id, toy_item_code_id, topK=3))
top_n.to_csv('Recommendations generated/toy-example/Self_BaselineIU_reco.csv', index=False, header=False)
estimations=pd.DataFrame(model.estimate(toy_user_code_id, toy_item_code_id, toy_test_ui))
estimations.to_csv('Recommendations generated/toy-example/Self_BaselineIU_estimations.csv', index=False, header=False)
Training data:
matrix([[5, 3, 4, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [5, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0]], dtype=int64)
After subtracting rows and columns:
matrix([[ 1.12403879, -0.23917738, 0.94111364, ..., 0. , 0. , 0. ], [ 0. , 0. , 0. , ..., 0. , 0. , 0. ], [ 0. , 0. , 0. , ..., 0. , 0. , 0. ], ..., [ 0.79639556, 0. , 0. , ..., 0. , 0. , 0. ], [ 0. , 0. , 0. , ..., 0. , 0. , 0. ], [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]])
Recommend best unseen item:
[[1, 814, 5.033569907350074], [2, 119, 5.073954341022419], [3, 119, 4.4091895061895], [4, 119, 5.897950732798565], [5, 119, 4.652876199931835], [6, 119, 4.8332609987661375], [7, 119, 5.460380872638376], [8, 119, 5.288661416309815], [9, 119, 5.641605128815088], [10, 119, 5.390515469506597], [11, 119, 4.887528747495148], [12, 119, 5.593385462360327], [13, 119, 4.7988643880253665], [14, 119, 5.293633791156009], [15, 119, 4.5691149509701665], [16, 119, 5.621022235609644], [17, 119, 4.401933175071678], [18, 119, 5.1136246795074145], [19, 119, 4.828561363832632], [20, 119, 4.67891154555548], [21, 119, 4.686981911864455], [22, 119, 4.977475553219404], [23, 119, 4.9557400991270795], [24, 119, 5.643906553501267], [25, 119, 5.182078286215623], [26, 119, 4.50014256759968], [27, 119, 4.655132325688396], [28, 119, 5.146362563799686], [29, 119, 5.064231306905151], [30, 119, 5.261671157244791], [31, 119, 5.174125806792258], [32, 119, 4.772889488953337], [33, 119, 5.472031388154129], [34, 119, 5.725640547396732], [35, 119, 5.06389058405801], [36, 119, 5.380757374300082], [37, 119, 5.335173262678534], [38, 119, 5.41438534300787], [39, 119, 5.070117546077895], [40, 119, 4.516579408961074], [41, 119, 4.852233579518073], [42, 119, 5.1802278477549235], [43, 119, 5.191447259871135], [44, 119, 4.944576190506044], [45, 119, 5.242283889842014], [46, 119, 5.562899242640933], [47, 119, 5.178733753891452], [48, 119, 5.009957370452998], [49, 119, 4.359876008169282], [50, 119, 5.121671046477792], [51, 119, 4.6007040768425345], [52, 119, 5.582487505009692], [53, 119, 5.194547597273854], [54, 119, 5.364952920943924], [55, 119, 4.8792052335131375], [56, 119, 5.2981580402492146], [57, 119, 5.221466492969975], [58, 119, 5.038836938114093], [59, 119, 5.337158024076944], [60, 119, 5.243138421619351], [61, 119, 4.776408504838526], [62, 119, 4.703042643349071], [63, 119, 4.605296126523914], [64, 119, 4.948533969947571], [65, 119, 5.221241287763163], [66, 119, 5.002725597922881], [67, 119, 5.796548513415513], [68, 119, 4.202892660828917], [69, 119, 5.095717444052698], [70, 119, 4.961287346505335], [71, 119, 4.7758440135224545], [72, 119, 5.132061838360728], [73, 119, 4.837305292087907], [74, 119, 4.992999499038664], [75, 119, 4.9300567320484765], [76, 119, 4.90472507206589], [77, 119, 4.733878487236107], [78, 119, 5.0279504063477], [79, 119, 5.356578662219639], [80, 119, 4.796097892768055], [81, 119, 4.815845261722918], [82, 119, 4.5571407493973926], [83, 119, 5.1043788080416475], [84, 119, 5.315531453571984], [85, 119, 4.811496617807297], [86, 119, 5.089346576323998], [87, 119, 5.4031633665628185], [88, 119, 5.364897077045082], [89, 119, 5.364492385412613], [90, 119, 5.464045041037035], [91, 119, 5.07834307914999], [92, 119, 4.89619045232672], [93, 119, 5.132906106276727], [94, 119, 5.165569088214399], [95, 119, 4.911927552710331], [96, 119, 5.237318355664692], [97, 119, 5.278380658021735], [98, 119, 5.024989889569549], [99, 119, 5.114670072323433], [100, 119, 4.77341777472129], [101, 119, 4.767421425016631], [102, 119, 4.18765475016262], [103, 119, 4.954448901545062], [104, 119, 4.278043365883535], [105, 119, 5.00471355286365], [106, 119, 5.0152702038012205], [107, 119, 4.405050604137838], [108, 119, 4.958096253519998], [109, 119, 5.025919819461879], [110, 119, 4.783264162043984], [111, 119, 4.761428877998011], [112, 119, 5.21980700136696], [113, 119, 5.287596900000982], [114, 119, 4.709320827699378], [115, 119, 5.095424067868969], [116, 119, 4.505252421327151], [117, 119, 5.570755157809327], [118, 119, 5.9693904671092035], [119, 119, 5.470128775869114], [120, 119, 4.829897956269527], [121, 119, 4.846449501561156], [122, 119, 5.162442574373336], [123, 119, 4.926391763157625], [124, 119, 4.508452155746849], [125, 119, 5.073088029120306], [126, 119, 5.320411733982426], [127, 119, 5.631323701239779], [128, 119, 4.983019890063413], [129, 119, 4.025228051382406], [130, 119, 5.679089192202012], [131, 119, 5.264606373351151], [132, 119, 4.767279681003622], [133, 119, 4.73850437348507], [134, 119, 5.268063413261265], [135, 119, 4.9335362235047535], [136, 119, 5.369248212586641], [137, 119, 5.892781121171061], [138, 119, 5.336572978543404], [139, 119, 5.294994563700997], [140, 119, 5.020383359544767], [141, 119, 5.206153916745533], [142, 119, 5.147437482554587], [143, 119, 5.502363341158085], [144, 119, 4.982420995286451], [145, 119, 5.111699136704155], [146, 119, 5.131844540350976], [147, 119, 5.49815452877021], [148, 119, 5.150493702574655], [149, 119, 4.422471343939307], [150, 119, 5.294638753008823], [151, 119, 5.280784226777257], [152, 119, 5.848717214485781], [153, 119, 4.35185590153092], [154, 119, 4.957331999832742], [155, 119, 4.294633092286114], [156, 119, 4.724555653245514], [157, 119, 5.331836348214715], [158, 119, 5.29563470926953], [159, 119, 5.373130426878894], [160, 119, 5.25593982371734], [161, 119, 3.7011318634127357], [162, 119, 5.029027308625525], [163, 119, 4.349653611375062], [164, 119, 5.776875048014619], [165, 119, 5.125275136700353], [166, 119, 5.409870923513742], [167, 119, 4.932795056493155], [168, 119, 5.0462613656746615], [169, 119, 5.333352129472001], [170, 119, 5.623352918413655], [171, 119, 4.756017107592705], [172, 119, 4.3476441103516645], [173, 119, 5.872252258659685], [174, 119, 5.3055694775659], [175, 119, 4.961815285454529], [176, 119, 5.237547508732767], [177, 119, 4.901440325581599], [178, 119, 5.149877511390417], [179, 119, 4.580314763774011], [180, 119, 5.360960041281329], [181, 119, 3.631886327990924], [182, 119, 5.031376479677573], [183, 119, 4.813212330066461], [184, 119, 5.033628257630295], [185, 119, 5.270700149720217], [186, 119, 5.091154830788304], [187, 119, 5.269615423633331], [188, 119, 5.073854035636777], [189, 119, 5.262901797218939], [190, 119, 4.98211951307573], [191, 119, 5.010409752512899], [192, 119, 5.014511194940377], [193, 119, 4.786474906664565], [194, 119, 4.429121545902051], [195, 119, 4.997604673491559], [196, 119, 5.04252575753673], [197, 119, 4.9233716490305435], [198, 119, 4.6115875499937244], [199, 119, 4.678416507843075], [200, 119, 5.641693457837851], [201, 119, 4.544936145240097], [202, 119, 3.9778311111563402], [203, 119, 4.804626282897184], [204, 119, 4.977518365717772], [205, 119, 4.401194928352744], [206, 119, 4.106967049206878], [207, 119, 4.682103740187742], [208, 119, 5.060732719106861], [209, 119, 4.629147281807057], [210, 119, 5.3792631759553045], [211, 119, 4.565348175844123], [212, 119, 4.9003162978321315], [213, 119, 5.5352937454461655], [214, 119, 5.028530925932685], [215, 119, 4.97315312516204], [216, 119, 5.222819202898463], [217, 119, 4.714379357024195], [218, 119, 4.804917413353149], [219, 119, 5.236264966914828], [220, 119, 5.288490863619137], [221, 119, 5.204223650414475], [222, 119, 4.78594576523568], [223, 119, 4.724111151809978], [224, 119, 4.594328502436183], [225, 119, 5.541201674564044], [226, 119, 4.9525586887561], [227, 119, 4.857243515686353], [228, 119, 4.750007609576216], [229, 119, 4.184070033944575], [230, 119, 5.130317929112821], [231, 119, 5.113941276318599], [232, 119, 5.03453499732367], [233, 119, 5.539321098101139], [234, 119, 4.5411390846018405], [235, 119, 5.1297845391382495], [236, 119, 4.757981316274573], [237, 119, 5.038498326692732], [238, 119, 4.844419494934953], [239, 119, 5.3242916603759385], [240, 119, 5.51837504099506], [241, 119, 4.89531020565281], [242, 119, 5.910342460944168], [243, 119, 4.974225770159702], [244, 119, 5.178614988559292], [245, 119, 4.953739387126585], [246, 119, 4.57910530351419], [247, 119, 5.252082179475536], [248, 119, 4.880703723844909], [249, 119, 5.449323339750327], [250, 119, 5.281424162522337], [251, 119, 5.152415602530395], [252, 119, 5.609545423756664], [253, 119, 5.178419316901218], [254, 119, 4.721938284173718], [255, 119, 4.541961084131481], [256, 119, 5.757223859221523], [257, 119, 5.40955432116383], [258, 119, 5.686292556695088], [259, 119, 5.2077192461572865], [260, 119, 5.86232369996712], [261, 119, 5.807642473817216], [262, 119, 4.6197177513904135], [263, 119, 5.27389538109063], [264, 119, 5.60034803424948], [265, 119, 5.1988722920656425], [266, 119, 4.514270949171721], [267, 119, 5.502810161867663], [268, 119, 4.606560482974151], [269, 119, 4.492224366152742], [270, 119, 5.852452315679431], [271, 119, 4.886064001866147], [272, 119, 5.488074418675656], [273, 119, 5.0057421519810195], [274, 119, 5.636090934797082], [275, 119, 4.515916367387962], [276, 119, 5.143969622677634], [277, 119, 4.908774073187353], [278, 119, 5.493259202106362], [279, 119, 5.091471764458881], [280, 119, 5.195277205252779], [281, 119, 5.1181221722898345], [282, 119, 5.053158605324266], [283, 119, 5.523010982643097], [284, 119, 5.147643666213289], [285, 119, 5.255722681033678], [286, 119, 5.338082340341463], [287, 119, 5.545114314326665], [288, 119, 5.023738430070666], [289, 119, 4.748066096484142], [290, 119, 4.859582985947547], [291, 119, 5.384420093049576], [292, 119, 5.395134933695142], [293, 119, 4.499762005826577], [294, 119, 5.194981347548985], [295, 119, 5.699792965933896], [296, 119, 5.361977570171258], [297, 119, 4.757159183846139], [298, 119, 5.227965610712197], [299, 119, 4.809375345581329], [300, 119, 5.683857362879431], [301, 119, 5.105383796927711], [302, 119, 4.394641912219202], [303, 119, 5.089716804726402], [304, 119, 4.942861668061295], [305, 119, 4.665378364163169], [306, 119, 5.215384878142734], [307, 119, 5.11203129219475], [308, 119, 5.091855131693625], [309, 119, 5.128825784164722], [310, 119, 5.209230454081929], [311, 119, 5.1407721907061585], [312, 119, 5.502215635457175], [313, 119, 4.94753322151928], [314, 119, 5.4685698855157545], [315, 119, 5.190361430373806], [316, 119, 4.553654288771685], [317, 119, 5.316759370677185], [318, 119, 5.148232521003984], [319, 119, 5.115458523331517], [320, 119, 5.442496863474155], [321, 119, 4.785861139031991], [322, 119, 5.243416821876891], [323, 119, 5.001168158493173], [324, 119, 6.082505452722179], [325, 119, 4.699041539011805], [326, 119, 4.733236222200766], [327, 119, 4.645555824956997], [328, 119, 4.9660084087009695], [329, 119, 4.697620345867989], [330, 119, 5.876627522375701], [331, 119, 4.913095360631611], [332, 119, 5.838863009009007], [333, 119, 5.084965139539092], [334, 119, 4.891052964934812], [335, 119, 5.161367776135255], [336, 119, 4.768582151852871], [337, 119, 5.415279751205751], [338, 119, 5.091804426455647], [339, 119, 5.376803886536359], [340, 119, 5.312190036689963], [341, 119, 5.77597888172158], [342, 119, 4.882155964910417], [343, 119, 5.390404538753804], [344, 119, 4.987023133428549], [345, 119, 5.077202129085668], [346, 119, 4.861142980210525], [347, 119, 5.135608881449464], [348, 119, 5.659821095455924], [349, 119, 4.8029966289350225], [350, 119, 5.289869436367119], [351, 119, 5.768191948930571], [352, 119, 4.81334928133547], [353, 119, 4.9141583477937045], [354, 119, 4.977289476336035], [355, 119, 5.843330267672947], [356, 119, 4.934119654551965], [357, 119, 5.900413047684568], [358, 119, 4.8408871692655815], [359, 119, 5.384591585540834], [360, 119, 5.126908966062767], [361, 119, 4.995753154003102], [362, 119, 5.066524272872163], [363, 119, 4.600232442894542], [364, 119, 5.274185868438341], [365, 119, 5.014516855389741], [366, 119, 5.9120916466322875], [367, 119, 5.828665375038756], [368, 119, 5.019920803646076], [369, 119, 5.4895885112080105], [370, 119, 4.693825810757257], [371, 119, 5.331534885422194], [372, 119, 5.975112673275686], [373, 119, 5.086739847535638], [374, 119, 4.935110005420545], [375, 119, 5.218355109178074], [376, 119, 4.945703312797557], [377, 119, 5.292964987321108], [378, 119, 4.981086859852017], [379, 119, 5.310292311690096], [380, 119, 4.445444086405465], [381, 119, 5.240853543187897], [382, 119, 4.693246943649045], [383, 119, 5.386984911964422], [384, 119, 5.675350555861452], [385, 119, 4.6651082149990835], [386, 119, 5.287131107228097], [387, 119, 4.697671717897123], [388, 119, 5.760462208990702], [389, 119, 5.033286893658086], [390, 119, 5.196445439756273], [391, 119, 4.95438200520642], [392, 119, 5.222492319440584], [393, 119, 5.140669115868742], [394, 119, 5.471338507578516], [395, 119, 5.324011530009094], [396, 119, 5.178979717213973], [397, 119, 5.121927809589882], [398, 119, 4.91724536337429], [399, 119, 4.623904112653535], [400, 119, 5.2608999072245926], [401, 119, 4.432003962867626], [402, 119, 5.138838420181101], [403, 119, 5.2309219614477405], [404, 119, 4.837181030816444], [405, 119, 3.772224429965197], [406, 119, 4.852350391794483], [407, 119, 4.840383865503861], [408, 119, 5.407692439538866], [409, 119, 4.844715060587921], [410, 119, 4.671913633520011], [411, 119, 5.0389768698300355], [412, 119, 5.024877348087905], [413, 119, 5.072855067615047], [414, 119, 5.320574876534494], [415, 119, 5.312260662846793], [416, 119, 5.416113841352471], [417, 119, 4.925800215146997], [418, 119, 4.2117934871222], [419, 119, 5.230614907384769], [420, 119, 5.3771441592284575], [421, 119, 4.989592781659587], [422, 119, 5.132224841112521], [423, 119, 5.061491263931092], [424, 119, 4.8482186448082665], [425, 119, 4.5176840128244775], [426, 119, 4.902239935393991], [427, 119, 6.322152733481298], [428, 119, 5.583518500342245], [429, 119, 4.898216522803049], [430, 119, 4.804154027514113], [431, 119, 5.117152546204174], [432, 119, 5.292790286563055], [433, 119, 4.934701214288558], [434, 119, 5.454272372591052], [435, 119, 4.942019821536239], [436, 119, 5.3093586927104495], [437, 119, 4.938502300021082], [438, 119, 5.638781887111042], [439, 119, 5.223140300319519], [440, 119, 5.659652317750877], [441, 119, 5.093152374120208], [442, 119, 4.714826122154337], [443, 119, 5.185717774097684], [444, 119, 5.129951622016915], [445, 119, 3.474804547989016], [446, 119, 4.2617885427448945], [447, 119, 5.045212136858607], [448, 119, 4.834185016126526], [449, 119, 5.182479089157565], [450, 119, 5.389428682925552], [451, 119, 4.634976014848581], [452, 119, 4.744987242371334], [453, 119, 4.857007120125064], [454, 119, 4.193344728769432], [455, 119, 4.8540175816241335], [456, 119, 4.906784550260121], [457, 119, 5.571213490250388], [458, 119, 4.930647801441382], [459, 119, 5.098160842614203], [460, 119, 4.859785961309035], [461, 119, 4.506609854296745], [462, 119, 5.69311148414584], [463, 119, 4.479524043150128], [464, 119, 5.453781666837509], [465, 119, 4.483099080030898], [466, 119, 4.9267415537753365], [467, 119, 5.038090241723867], [468, 119, 5.228626149934321], [469, 119, 5.54818573398622], [470, 119, 4.956634435133585], [471, 119, 4.829177936514162], [472, 119, 5.955250707982839], [473, 119, 5.201002774027357], [474, 119, 5.327140254142855], [475, 119, 4.7202234003999575], [476, 119, 5.02361199977725], [477, 119, 6.141074699202437], [478, 119, 4.794796900267918], [479, 119, 4.8353372113956405], [480, 119, 4.826259068319772], [481, 119, 5.225181299888465], [482, 119, 4.943101979273367], [483, 119, 4.442810394549186], [484, 119, 5.37735003425589], [485, 119, 4.427732463547683], [486, 119, 4.8787345864029446], [487, 119, 5.188453009564555], [488, 119, 4.555817767164752], [489, 119, 5.631355136877604], [490, 119, 4.240011732858452], [491, 119, 4.951719086276718], [492, 119, 4.545194228693035], [493, 119, 5.072937763969151], [494, 119, 5.135650019541141], [495, 119, 5.550041430650033], [496, 119, 4.419355943401801], [497, 119, 4.921595750490021], [498, 119, 4.5946078082148825], [499, 119, 5.0341753004388226], [500, 119, 4.878417768677971], [501, 119, 5.324527101362827], [502, 119, 4.985085136986191], [503, 119, 5.327046474570401], [504, 119, 5.2629175880891905], [505, 119, 4.769201896696115], [506, 119, 5.113635888476657], [507, 119, 6.292013547159041], [508, 119, 5.088995503011603], [509, 119, 4.084109393770102], [510, 119, 4.814977399153838], [511, 119, 5.615719561042972], [512, 119, 5.4816660912981785], [513, 119, 5.772012526919927], [514, 119, 5.139146861273941], [515, 119, 4.672596537287096], [516, 119, 5.2782099884238765], [517, 119, 4.964797602856385], [518, 119, 5.368018553220295], [519, 119, 5.884650704638997], [520, 119, 5.059650887130516], [521, 119, 4.607060773939866], [522, 119, 5.359863409996289], [523, 119, 5.7397474810005615], [524, 119, 4.8872078421825345], [525, 119, 4.854494510509091], [526, 119, 4.801337251810864], [527, 119, 4.94909211342071], [528, 119, 5.044396590638414], [529, 119, 5.287340950823258], [530, 119, 5.090482922762932], [531, 119, 5.235678223193207], [532, 119, 5.7311132300247865], [533, 119, 4.824581101754973], [534, 119, 5.9571456174213395], [535, 119, 5.131777153408957], [536, 119, 5.19570050790351], [537, 119, 4.2662587823479505], [538, 119, 4.811003639147802], [539, 119, 5.138352198426621], [540, 119, 5.182683217637528], [541, 119, 5.283737421162892], [542, 119, 4.936783255934791], [543, 119, 4.901665874280385], [544, 119, 4.415500557232555], [545, 119, 5.0948036918425466], [546, 119, 5.677634278545979], [547, 119, 5.120008575672966], [548, 119, 5.147881228083122], [549, 119, 5.291061913172442], [550, 119, 5.355477464544644], [551, 119, 5.404943220912202], [552, 119, 4.819974104721896], [553, 119, 5.250723165102726], [554, 119, 4.960053058279481], [555, 119, 5.473517554483852], [556, 119, 5.408347216447706], [557, 119, 5.158730459513525], [558, 119, 5.338637884376442], [559, 119, 4.745777295524993], [560, 119, 4.787991557218501], [561, 119, 4.407751826200045], [562, 119, 4.822364004330887], [563, 119, 5.583167794665759], [564, 119, 5.04087743825554], [565, 119, 5.727276349178605], [566, 119, 4.829027697087762], [567, 119, 5.041188627111462], [568, 119, 4.408414828024504], [569, 119, 5.021718099683384], [570, 119, 4.433551301244496], [571, 119, 4.489357872796718], [572, 119, 4.89951943038399], [573, 119, 4.833510248734752], [574, 119, 4.942051651896636], [575, 119, 4.224660297550287], [576, 119, 4.952361636450058], [577, 119, 5.35618627873975], [578, 119, 4.316664125722149], [579, 119, 4.778990471760746], [580, 119, 5.41933009054476], [581, 119, 5.256087151020818], [582, 119, 4.846239105834378], [583, 119, 5.537348257171773], [584, 119, 4.961019540837622], [585, 119, 5.030889182842003], [586, 119, 4.997808170730131], [587, 119, 4.809271875363427], [588, 119, 5.171586654264358], [589, 119, 5.24595948711995], [590, 119, 4.916827323761321], [591, 119, 4.934156206695167], [592, 119, 5.326917116353411], [593, 119, 5.034241214204523], [594, 119, 4.716897462505345], [595, 119, 5.008003360241809], [596, 119, 5.129295484249714], [597, 119, 5.270724694865878], [598, 119, 5.231199021823516], [599, 119, 5.7746316089633], [600, 119, 5.234446006520899], [601, 119, 4.504013686897994], [602, 119, 5.49070849793954], [603, 119, 5.223620425119618], [604, 119, 5.042770524965352], [605, 119, 4.989877824271861], [606, 119, 5.406894088176186], [607, 119, 4.906612176268283], [608, 119, 4.908436023127783], [609, 119, 4.13088792367319], [610, 119, 4.9103298000339475], [611, 119, 5.3419216167194445], [612, 119, 4.837361204505954], [613, 119, 5.411916333308303], [614, 119, 4.8499451661602215], [615, 119, 5.135441274491621], [616, 119, 5.144015711189204], [617, 119, 4.217200371466539], [618, 119, 4.611936674337623], [619, 119, 5.16246689013296], [620, 119, 5.505658576058516], [621, 119, 5.250663028247026], [622, 119, 4.962474565763552], [623, 119, 5.0000003989501804], [624, 119, 5.154391394994647], [625, 119, 4.707325539839868], [626, 119, 4.1674594030698815], [627, 119, 4.719724031611473], [628, 119, 6.229385979330356], [629, 119, 5.384259152119899], [630, 119, 4.9256365463303915], [631, 119, 4.691133086221134], [632, 119, 4.893889199474961], [633, 119, 4.659747539908604], [634, 119, 5.060307606549527], [635, 119, 4.992518980622349], [636, 119, 5.7524138925604955], [637, 119, 4.201943658835363], [638, 119, 4.816832757985242], [639, 119, 4.159366530123672], [640, 119, 5.713789181349426], [641, 119, 5.175627052555137], [642, 119, 5.480532416720213], [643, 119, 5.023498869144431], [644, 119, 5.854102530587965], [645, 119, 5.110790427269028], [646, 119, 4.710319386303072], [647, 119, 5.174469085543948], [648, 119, 4.941793577809135], [649, 119, 5.353589759701105], [650, 119, 4.591253670832958], [651, 119, 4.730028403770027], [652, 119, 4.682540195327883], [653, 119, 4.2539499194786945], [654, 119, 5.140074507622913], [655, 119, 4.451328142953941], [656, 119, 4.044500548896511], [657, 119, 4.70221010460015], [658, 119, 4.930023891494333], [659, 119, 4.996803409133071], [660, 119, 4.131429770206362], [661, 119, 5.042626303904094], [662, 119, 5.356145477316178], [663, 119, 5.080885672816072], [664, 119, 4.985564624548898], [665, 119, 4.947226214180563], [666, 119, 4.942220435763473], [667, 119, 4.971772887611963], [668, 119, 4.972696195547019], [669, 119, 4.584501171089872], [670, 119, 4.914921700630502], [671, 119, 5.218796734166459], [672, 119, 4.8683014044261945], [673, 119, 5.173705949053478], [674, 119, 5.236247598323732], [675, 119, 5.0313363553760375], [676, 119, 5.061099644941615], [677, 119, 5.28488772211992], [678, 119, 4.672215776187889], [679, 119, 4.867389734284828], [680, 119, 5.1970284169173055], [681, 119, 4.782046435649965], [682, 119, 4.7812295840323875], [683, 119, 4.808275428489925], [684, 119, 4.962647019752243], [685, 119, 3.8048056775387877], [686, 119, 5.629254462834875], [687, 119, 5.189525687313266], [688, 119, 6.679769053778524], [689, 119, 5.301621297973304], [690, 119, 4.713406522735305], [691, 119, 5.414154814098006], [692, 119, 4.832425685149168], [693, 119, 4.426837555811554], [694, 119, 5.3492724283936175], [695, 119, 4.825441398767072], [696, 119, 5.389414442812723], [697, 119, 5.330007610513084], [698, 119, 3.9571462409859652], [699, 119, 4.857010386949514], [700, 119, 4.684856276467835], [701, 119, 5.420330756917081], [702, 119, 4.243130435237866], [703, 119, 5.022739086728591], [704, 119, 4.849106571202066], [705, 119, 5.234827980333065], [706, 119, 4.84459519348846], [707, 119, 4.864224226660609], [708, 119, 4.881878815735469], [709, 119, 5.183922733444516], [710, 119, 4.874290143847261], [711, 119, 5.217764978618031], [712, 119, 5.379205754217918], [713, 119, 4.387145461846881], [714, 119, 5.065631979664513], [715, 119, 4.862630639835223], [716, 119, 5.183892213168002], [717, 119, 5.235206153632371], [718, 119, 5.4708780713666965], [719, 119, 4.417903458598412], [720, 119, 5.343405037298482], [721, 119, 5.2188679803876425], [722, 119, 5.403054467913802], [723, 119, 4.471663305524687], [724, 119, 4.066828969081406], [725, 119, 5.323478306387982], [726, 119, 4.7377413402549235], [727, 119, 4.759752009598279], [728, 119, 5.136604401607281], [729, 119, 4.489315183223727], [730, 119, 4.745689837142954], [731, 119, 4.696301626626963], [732, 119, 5.70589200375807], [733, 119, 4.618265150782996], [734, 119, 4.725381598147438], [735, 119, 4.767679235389325], [736, 119, 4.404071575076612], [737, 119, 4.895967071373878], [738, 119, 4.97238900150061], [739, 119, 4.980531905340029], [740, 119, 5.106970454026848], [741, 119, 4.87543940683178], [742, 119, 4.76086180525368], [743, 119, 5.016253127067696], [744, 119, 4.941767496192511], [745, 119, 4.142975975255707], [746, 119, 4.9675717532672525], [747, 119, 5.361817830639696], [748, 119, 4.7529827222669], [749, 119, 5.124922354718137], [750, 119, 4.9176545059729655], [751, 119, 4.87933925609656], [752, 119, 4.764233748335658], [753, 119, 4.751971257162278], [754, 119, 4.868588512060784], [755, 119, 4.841785981893766], [756, 119, 4.752011844483538], [757, 119, 4.901907695075796], [758, 119, 5.381407615351838], [759, 119, 5.5847937832968775], [760, 119, 4.647177524509981], [761, 119, 4.5999651438519935], [762, 119, 4.587309949367152], [763, 119, 5.098054370293165], [764, 119, 5.172821733605914], [765, 119, 5.005917020705696], [766, 119, 4.585700009627439], [767, 119, 5.482350707791083], [768, 119, 4.638174928390513], [769, 119, 4.877482556938734], [770, 119, 5.8315099852982435], [771, 119, 4.947740584094914], [772, 119, 5.578727286378729], [773, 119, 4.724619999828209], [774, 119, 3.6767248114782713], [775, 119, 5.177349559169423], [776, 119, 5.0696158641249935], [777, 119, 5.233343746222884], [778, 119, 4.33957773413252], [779, 119, 5.621449159720873], [780, 119, 4.975029439428033], [781, 119, 4.937198112356666], [782, 119, 4.765577182914758], [783, 119, 5.624970977086942], [784, 119, 5.402414929260256], [785, 119, 4.935665936490744], [786, 119, 5.117733164984038], [787, 119, 4.883441492756236], [788, 119, 4.813657002638947], [789, 119, 5.068492401968926], [790, 119, 4.680863137410789], [791, 119, 5.394788505015768], [792, 119, 5.031061512176776], [793, 119, 5.18149959814334], [794, 119, 5.500017344955217], [795, 119, 4.683850933566036], [796, 119, 5.1175442090621655], [797, 119, 4.58366652338353], [798, 119, 5.0158304038078505], [799, 119, 5.205576705367591], [800, 119, 5.1564272377461435], [801, 119, 5.801329538057645], [802, 119, 5.2879808174803395], [803, 119, 4.8703727256827944], [804, 119, 5.252419655123518], [805, 119, 4.944406380810311], [806, 119, 4.974662135204756], [807, 119, 5.452796743536997], [808, 119, 5.794779917899503], [809, 119, 4.884518615486716], [810, 119, 6.284394261494909], [811, 119, 5.967465243145893], [812, 119, 5.230495844461295], [813, 119, 5.027525100989455], [814, 119, 4.793366394940832], [815, 119, 5.168701234241223], [816, 119, 5.7212782529576], [817, 119, 5.046787963987198], [818, 119, 4.956141337626713], [819, 119, 5.608368103651799], [820, 119, 4.671965592246055], [821, 119, 5.656104332257769], [822, 119, 4.75497672651421], [823, 119, 5.368758812954268], [824, 119, 4.45747411667286], [825, 119, 5.424543722221711], [826, 119, 5.450754880861052], [827, 119, 4.695575517648117], [828, 119, 4.573009705678042], [829, 119, 4.806414206002993], [830, 119, 5.0671661943349555], [831, 119, 4.860860127820009], [832, 119, 4.7692663189923525], [833, 119, 4.5308752038816715], [834, 119, 5.307491105768178], [835, 119, 5.331150991305805], [836, 119, 5.0551256669535425], [837, 119, 4.612266514438105], [838, 119, 5.285801847424993], [839, 119, 4.718213134622884], [840, 119, 5.338811493235026], [841, 119, 5.488565768667157], [842, 119, 5.225723796129509], [843, 119, 4.216406255194322], [844, 119, 4.745778495450207], [845, 119, 4.5098397149752465], [846, 119, 5.200947389880737], [847, 119, 4.516779255854752], [848, 119, 5.516812015409224], [849, 119, 6.251203352037819], [850, 119, 5.527347878447809], [851, 119, 5.223678192192554], [852, 119, 5.261695018918165], [853, 119, 4.88114319909229], [854, 119, 4.612468029656682], [855, 119, 4.487790152570763], [856, 119, 5.161342094147182], [857, 119, 4.965622635367685], [858, 119, 4.796567698052261], [859, 119, 5.42268976144338], [860, 119, 4.760668165957881], [861, 119, 5.351038279681958], [862, 119, 5.7327289488044055], [863, 119, 4.956972787585617], [864, 119, 5.331138754026031], [865, 119, 3.9454824246497573], [866, 119, 3.9529414743571794], [867, 119, 5.443949757600708], [868, 119, 4.4530039027391375], [869, 119, 4.418624025596287], [870, 119, 4.762649048555637], [871, 119, 4.894814208072195], [872, 119, 5.270132156925509], [873, 119, 4.511925341300865], [874, 119, 5.056284307905434], [875, 119, 5.319773891216068], [876, 119, 5.382513704197602], [877, 119, 5.230747734946368], [878, 119, 4.464978504573857], [879, 119, 5.206979234986366], [880, 119, 5.161778000014536], [881, 119, 4.835809107192686], [882, 119, 5.518468066541994], [883, 119, 5.2870270338030485], [884, 119, 5.097744676598067], [885, 119, 4.823548830499029], [886, 119, 4.8345901760160555], [887, 119, 5.463598379848553], [888, 119, 5.670420440013476], [889, 119, 4.83035143719148], [890, 119, 5.0611655893874135], [891, 119, 5.665294304997954], [892, 119, 5.36027722388848], [893, 119, 5.216080046585126], [894, 119, 5.081047985416832], [895, 119, 5.325709451376277], [896, 119, 4.526959553802731], [897, 119, 5.371410791303421], [898, 119, 5.0881366181640555], [899, 119, 4.804669412081279], [900, 119, 3.9944458046644806], [901, 119, 5.411423984753554], [902, 119, 4.637151945081609], [903, 119, 5.206940461265847], [904, 119, 5.315425755520142], [905, 119, 4.781654749806642], [906, 119, 5.242153227536237], [907, 119, 6.14092236740025], [908, 119, 4.599443925757107], [909, 119, 5.750259313226152], [910, 119, 4.484461737689663], [911, 119, 4.925077657007165], [912, 119, 4.831394413420391], [913, 119, 4.748169090205628], [914, 119, 4.57457357992255], [915, 119, 4.394357381637095], [916, 119, 4.819872161209081], [917, 119, 5.009941610326153], [918, 119, 4.666423270100798], [919, 119, 5.012647095869783], [920, 119, 4.642960698044466], [921, 119, 4.846180180317139], [922, 119, 4.929665253882761], [923, 119, 5.807129607981566], [924, 119, 4.945665469121162], [925, 119, 4.959289366043956], [926, 119, 4.769391789367214], [927, 119, 5.401862550677506], [928, 119, 5.90027459650935], [929, 119, 4.706129363518036], [930, 119, 4.382542607174525], [931, 119, 5.138232131596746], [932, 814, 5.229106733472933], [933, 119, 4.2154842803536265], [934, 119, 4.975412453500192], [935, 119, 5.398568902268414], [936, 119, 5.398883729354001], [937, 119, 4.593078451515704], [938, 119, 5.0839951883701415], [939, 119, 6.090658929769783], [940, 119, 4.837174032770571], [941, 119, 5.361213132777179], [942, 119, 5.484532765272456], [943, 119, 4.995921413899483]]
Print estimations on unseen items:
user | item | est_score | |
---|---|---|---|
0 | 1 | 5 | 3.516903 |
1 | 1 | 10 | 3.901217 |
2 | 1 | 25 | 3.479851 |
3 | 1 | 32 | 3.833570 |
4 | 1 | 33 | 3.488115 |
... | ... | ... | ... |
19995 | 943 | 928 | 2.995921 |
19996 | 943 | 1067 | 3.482408 |
19997 | 943 | 1074 | 2.943290 |
19998 | 943 | 1188 | 2.732764 |
19999 | 943 | 1228 | 2.522237 |
20000 rows × 3 columns
model=selfBaselineIU()
model.fit(train_ui)
top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))
top_n.to_csv('Recommendations generated/ml-100k/Self_BaselineIU_reco.csv', index=False, header=False)
estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))
estimations.to_csv('Recommendations generated/ml-100k/Self_BaselineIU_estimations.csv', index=False, header=False)
Ready-made baseline - Surprise implementation
import surprise as sp
import time
# Based on surprise.readthedocs.io
def get_top_n(predictions, n=10):
# Here we create a dictionary which items are lists of pairs (item, score)
top_n = defaultdict(list)
for uid, iid, true_r, est, _ in predictions:
top_n[uid].append((iid, est))
result=[]
# Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)
for uid, user_ratings in top_n.items():
user_ratings.sort(key=lambda x: x[1], reverse=True)
result.append([uid]+list(chain(*user_ratings[:n])))
return result
reader = sp.Reader(line_format='user item rating timestamp', sep='\t')
trainset = sp.Dataset.load_from_file('train.csv', reader=reader)
trainset = trainset.build_full_trainset() # <class 'surprise.trainset.Trainset'> -> it is needed for using Surprise package
testset = sp.Dataset.load_from_file('test.csv', reader=reader)
testset = sp.Trainset.build_testset(testset.build_full_trainset())
algo = sp.BaselineOnly()
# algo = sp.BaselineOnly(bsl_options={'method':'sgd', 'reg':0, 'n_epochs':2000})
# observe how bad results gives above algorithm
# more details http://courses.ischool.berkeley.edu/i290-dm/s11/SECURE/a1-koren.pdf - chapter 2.1
algo.fit(trainset)
antitrainset = trainset.build_anti_testset() # We want to predict ratings of pairs (user, item) which are not in train set
predictions = algo.test(antitrainset)
top_n = get_top_n(predictions, n=10)
top_n=pd.DataFrame(top_n)
top_n.to_csv('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', index=False, header=False)
Estimating biases using als...
# Compute RMSE on testset using buildin functions
predictions = algo.test(testset)
sp.accuracy.rmse(predictions, verbose=True)
# Let's also save the results in file
predictions_df=[]
for uid, iid, true_r, est, _ in predictions:
predictions_df.append([uid, iid, est])
predictions_df=pd.DataFrame(predictions_df)
predictions_df.to_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', index=False, header=False)
sp.accuracy.mae(predictions, verbose=True)
RMSE: 0.9495 MAE: 0.7525
0.7524871012820799
Let's compare with random
# in surprise random is an algorithm predicting random value regarding to normal distribution estimated from train set
algo = sp.NormalPredictor()
algo.fit(trainset)
antitrainset = trainset.build_anti_testset() # We want to predict ratings of pairs (user, item) which are not in train set
predictions = algo.test(antitrainset)
top_n = get_top_n(predictions, n=10)
top_n=pd.DataFrame(top_n)
top_n.to_csv('Recommendations generated/ml-100k/Ready_Random_reco.csv', index=False, header=False)
# Compute RMSE on testset using buildin functions
predictions = algo.test(testset)
sp.accuracy.rmse(predictions, verbose=True)
# Let's also save the results in file
predictions_df=[]
for uid, iid, true_r, est, _ in predictions:
predictions_df.append([uid, iid, est])
predictions_df=pd.DataFrame(predictions_df)
predictions_df.to_csv('Recommendations generated/ml-100k/Ready_Random_estimations.csv', index=False, header=False)
sp.accuracy.mae(predictions, verbose=True)
RMSE: 1.5195 MAE: 1.2152
1.2151550322469735