Prepare test set

import pandas as pd
import numpy as np
import scipy.sparse as sparse
from collections import defaultdict
from itertools import chain
import random
from tqdm import tqdm

# In evaluation we do not load train set - it is not needed
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)
test.columns=['user', 'item', 'rating', 'timestamp']

test['user_code'] = test['user'].astype("category").cat.codes
test['item_code'] = test['item'].astype("category").cat.codes

user_code_id = dict(enumerate(test['user'].astype("category").cat.categories))
user_id_code = dict((v, k) for k, v in user_code_id.items())
item_code_id = dict(enumerate(test['item'].astype("category").cat.categories))
item_id_code = dict((v, k) for k, v in item_code_id.items())

test_ui = sparse.csr_matrix((test['rating'], (test['user_code'], test['item_code'])))

Estimations metrics

estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)
estimations_df.columns=['user', 'item' ,'score']

estimations_df['user_code']=[user_id_code[user] for user in estimations_df['user']]
estimations_df['item_code']=[item_id_code[item] for item in estimations_df['item']]
estimations=sparse.csr_matrix((estimations_df['score'], (estimations_df['user_code'], estimations_df['item_code'])), shape=test_ui.shape)

def estimations_metrics(test_ui, estimations):
    result=[]

    RMSE=(np.sum((estimations.data-test_ui.data)**2)/estimations.nnz)**(1/2)
    result.append(['RMSE', RMSE])

    MAE=np.sum(abs(estimations.data-test_ui.data))/estimations.nnz
    result.append(['MAE', MAE])
    
    df_result=(pd.DataFrame(list(zip(*result))[1])).T
    df_result.columns=list(zip(*result))[0]
    return df_result

estimations_metrics(test_ui, estimations)

	RMSE	MAE
0	0.949459	0.752487

Ranking metrics

import numpy as np
reco = np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')
# Let's ignore scores - they are not used in evaluation: 
users=reco[:,:1]
items=reco[:,1::2]
# Let's use inner ids instead of real ones
users=np.vectorize(lambda x: user_id_code.setdefault(x, -1))(users)
items=np.vectorize(lambda x: item_id_code.setdefault(x, -1))(items) # maybe items we recommend are not in test set
# Let's put them into one array
reco=np.concatenate((users, items), axis=1)
reco

array([[663, 475,  62, ..., 472, 269, 503],
       [ 48, 313, 475, ..., 591, 175, 466],
       [351, 313, 475, ..., 591, 175, 466],
       ...,
       [259, 313, 475, ...,  11, 591, 175],
       [ 33, 313, 475, ...,  11, 591, 175],
       [ 77, 313, 475, ...,  11, 591, 175]])

def ranking_metrics(test_ui, reco, super_reactions=[], topK=10):
    
    nb_items=test_ui.shape[1]
    relevant_users, super_relevant_users, prec, rec, F_1, F_05, prec_super, rec_super, ndcg, mAP, MRR, LAUC, HR=\
    0,0,0,0,0,0,0,0,0,0,0,0,0
    
    cg = (1.0 / np.log2(np.arange(2, topK + 2)))
    cg_sum = np.cumsum(cg)
    
    for (nb_user, user) in tqdm(enumerate(reco[:,0])):
        u_rated_items=test_ui.indices[test_ui.indptr[user]:test_ui.indptr[user+1]]
        nb_u_rated_items=len(u_rated_items)
        if nb_u_rated_items>0: # skip users with no items in test set (still possible that there will be no super items)
            relevant_users+=1
            
            u_super_items=u_rated_items[np.vectorize(lambda x: x in super_reactions)\
            (test_ui.data[test_ui.indptr[user]:test_ui.indptr[user+1]])]
            # more natural seems u_super_items=[item for item in u_rated_items if test_ui[user,item] in super_reactions]
            # but accesing test_ui[user,item] is expensive -we should avoid doing it
            if len(u_super_items)>0:
                super_relevant_users+=1
            
            user_successes=np.zeros(topK)
            nb_user_successes=0
            user_super_successes=np.zeros(topK)
            nb_user_super_successes=0
            
            # evaluation
            for (item_position,item) in enumerate(reco[nb_user,1:topK+1]):
                if item in u_rated_items:
                    user_successes[item_position]=1
                    nb_user_successes+=1
                    if item in u_super_items:
                        user_super_successes[item_position]=1
                        nb_user_super_successes+=1
                        
            prec_u=nb_user_successes/topK 
            prec+=prec_u
            
            rec_u=nb_user_successes/nb_u_rated_items
            rec+=rec_u
            
            F_1+=2*(prec_u*rec_u)/(prec_u+rec_u) if prec_u+rec_u>0 else 0
            F_05+=(0.5**2+1)*(prec_u*rec_u)/(0.5**2*prec_u+rec_u) if prec_u+rec_u>0 else 0
            
            prec_super+=nb_user_super_successes/topK
            rec_super+=nb_user_super_successes/max(len(u_super_items),1) # to set 0 if no super items
            ndcg+=np.dot(user_successes,cg)/cg_sum[min(topK, nb_u_rated_items)-1]
            
            cumsum_successes=np.cumsum(user_successes)
            mAP+=np.dot(cumsum_successes/np.arange(1,topK+1), user_successes)/min(topK, nb_u_rated_items)
            MRR+=1/(user_successes.nonzero()[0][0]+1) if user_successes.nonzero()[0].size>0 else 0
            LAUC+=(np.dot(cumsum_successes, 1-user_successes)+\
            (nb_user_successes+nb_u_rated_items)/2*((nb_items-nb_u_rated_items)-(topK-nb_user_successes)))/\
            ((nb_items-nb_u_rated_items)*nb_u_rated_items)
            
            HR+=nb_user_successes>0
            
            
    result=[]
    result.append(('precision', prec/relevant_users))
    result.append(('recall', rec/relevant_users))
    result.append(('F_1', F_1/relevant_users))
    result.append(('F_05', F_05/relevant_users))
    result.append(('precision_super', prec_super/super_relevant_users))
    result.append(('recall_super', rec_super/super_relevant_users))
    result.append(('NDCG', ndcg/relevant_users))
    result.append(('mAP', mAP/relevant_users))
    result.append(('MRR', MRR/relevant_users))
    result.append(('LAUC', LAUC/relevant_users))
    result.append(('HR', HR/relevant_users))

    df_result=(pd.DataFrame(list(zip(*result))[1])).T
    df_result.columns=list(zip(*result))[0]
    return df_result

ranking_metrics(test_ui, reco, super_reactions=[4,5], topK=10)

943it [00:00, 7832.26it/s]

	precision	recall	F_1	F_05	precision_super	recall_super	NDCG	mAP	MRR	LAUC	HR
0	0.09141	0.037652	0.04603	0.061286	0.079614	0.056463	0.095957	0.043178	0.198193	0.515501	0.437964

Diversity metrics

def diversity_metrics(test_ui, reco, topK=10):
    
    frequencies=defaultdict(int)
    
    # let's assign 0 to all items in test set
    for item in list(set(test_ui.indices)):
        frequencies[item]=0
        
    # counting frequencies
    for item in reco[:,1:].flat:
        frequencies[item]+=1
        
    nb_reco_outside_test=frequencies[-1]
    del frequencies[-1]
    
    frequencies=np.array(list(frequencies.values()))
                         
    nb_rec_items=len(frequencies[frequencies>0])
    nb_reco_inside_test=np.sum(frequencies)
                         
    frequencies=frequencies/np.sum(frequencies)
    frequencies=np.sort(frequencies)
    
    with np.errstate(divide='ignore'): # let's put zeros put items with 0 frequency and ignore division warning
        log_frequencies=np.nan_to_num(np.log(frequencies), posinf=0, neginf=0)
                         
    result=[]
    result.append(('Reco in test', nb_reco_inside_test/(nb_reco_inside_test+nb_reco_outside_test)))
    result.append(('Test coverage', nb_rec_items/test_ui.shape[1]))
    result.append(('Shannon', -np.dot(frequencies, log_frequencies)))
    result.append(('Gini', np.dot(frequencies, np.arange(1-len(frequencies), len(frequencies), 2))/(len(frequencies)-1)))
    
    df_result=(pd.DataFrame(list(zip(*result))[1])).T
    df_result.columns=list(zip(*result))[0]
    return df_result

import evaluation_measures as ev
import imp
imp.reload(ev)

x=diversity_metrics(test_ui, reco, topK=10)
x

	Reco in test	Test coverage	Shannon	Gini
0	1.0	0.033911	2.836513	0.991139

To be used in other notebooks

import evaluation_measures as ev
import imp
imp.reload(ev)

estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_Baseline_estimations.csv', header=None)
reco=np.loadtxt('Recommendations generated/ml-100k/Ready_Baseline_reco.csv', delimiter=',')

ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None),
            estimations_df=estimations_df, 
            reco=reco,
            super_reactions=[4,5])
#also you can just type ev.evaluate_all(estimations_df, reco) - I put above values as default

943it [00:00, 8174.46it/s]

	RMSE	MAE	precision	recall	F_1	F_05	precision_super	recall_super	NDCG	mAP	MRR	LAUC	HR	Reco in test	Test coverage	Shannon	Gini
0	0.949459	0.752487	0.09141	0.037652	0.04603	0.061286	0.079614	0.056463	0.095957	0.043178	0.198193	0.515501	0.437964	1.0	0.033911	2.836513	0.991139

import evaluation_measures as ev
import imp
imp.reload(ev)

dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)

df=ev.evaluate_all(test, dir_path, super_reactions)
#also you can just type ev.evaluate_all() - I put above values as default

943it [00:00, 8620.89it/s]
943it [00:00, 7627.42it/s]
943it [00:00, 8642.57it/s]
943it [00:00, 7752.46it/s]
943it [00:00, 8864.93it/s]
943it [00:00, 8549.57it/s]
943it [00:00, 5768.05it/s]
943it [00:00, 8257.83it/s]
943it [00:00, 7608.73it/s]
943it [00:00, 8086.29it/s]
943it [00:00, 9124.19it/s]
943it [00:00, 8456.44it/s]
943it [00:00, 8696.29it/s]
943it [00:00, 8500.80it/s]
943it [00:00, 9023.45it/s]
943it [00:00, 8529.05it/s]

df.iloc[:,:9]

Model	RMSE	MAE	precision	recall	F_1	F_05	precision_super	recall_super
Self_RP3Beta	3.702446	3.527273	0.282185	0.192092	0.186749	0.216980	0.204185	0.240096
Self_TopPop	2.508258	2.217909	0.188865	0.116919	0.118732	0.141584	0.130472	0.137473
Ready_SVD	0.952784	0.750597	0.095228	0.047497	0.053142	0.067082	0.084871	0.076457
Self_SVDBaseline	0.930321	0.734643	0.092683	0.042046	0.048568	0.063218	0.082940	0.068730
Ready_SVDBiased	0.940375	0.742264	0.092153	0.039645	0.046804	0.061886	0.079399	0.055967
Ready_Baseline	0.949459	0.752487	0.091410	0.037652	0.046030	0.061286	0.079614	0.056463
Self_SVD	0.939326	0.740022	0.074549	0.031755	0.038425	0.050562	0.065665	0.050602
Self_GlobalAvg	1.125760	0.943534	0.061188	0.025968	0.031383	0.041343	0.040558	0.032107
Ready_Random	1.518551	1.218784	0.050583	0.024085	0.027323	0.034826	0.031223	0.026436
Ready_I-KNN	1.030386	0.813067	0.026087	0.006908	0.010593	0.016046	0.021137	0.009522
Ready_I-KNNBaseline	0.935327	0.737424	0.002545	0.000755	0.001105	0.001602	0.002253	0.000930
Ready_U-KNNBaseline	0.935327	0.737424	0.002545	0.000755	0.001105	0.001602	0.002253	0.000930
Ready_U-KNN	1.023495	0.807913	0.000742	0.000205	0.000305	0.000449	0.000536	0.000198
Self_TopRated	1.033085	0.822057	0.000954	0.000188	0.000298	0.000481	0.000644	0.000223
Self_BaselineUI	0.967585	0.762740	0.000954	0.000170	0.000278	0.000463	0.000644	0.000189
Self_IKNN	1.018363	0.808793	0.000318	0.000108	0.000140	0.000189	0.000000	0.000000

df.iloc[:,np.append(0,np.arange(9, df.shape[1]))]

Model	NDCG	mAP	MRR	LAUC	HR	Reco in test	Test coverage	Shannon	Gini
Self_RP3Beta	0.339114	0.204905	0.572157	0.593544	0.875928	1.000000	0.077201	3.875892	0.974947
Self_TopPop	0.214651	0.111707	0.400939	0.555546	0.765642	1.000000	0.038961	3.159079	0.987317
Ready_SVD	0.109075	0.050124	0.241366	0.520459	0.499470	0.992047	0.217893	4.405246	0.953484
Self_SVDBaseline	0.098937	0.044405	0.203936	0.517696	0.469777	1.000000	0.058442	3.085857	0.988824
Ready_SVDBiased	0.102017	0.047972	0.216876	0.516515	0.441145	0.997455	0.167388	4.235348	0.962085
Ready_Baseline	0.095957	0.043178	0.198193	0.515501	0.437964	1.000000	0.033911	2.836513	0.991139
Self_SVD	0.077117	0.031574	0.165509	0.512485	0.414634	0.981866	0.080087	3.858982	0.975271
Self_GlobalAvg	0.067695	0.027470	0.171187	0.509546	0.384942	1.000000	0.025974	2.711772	0.992003
Ready_Random	0.054902	0.020652	0.137928	0.508570	0.353128	0.987699	0.183261	5.093805	0.908215
Ready_I-KNN	0.024214	0.008958	0.048068	0.499885	0.154825	0.402333	0.434343	5.133650	0.877999
Ready_I-KNNBaseline	0.003444	0.001362	0.011760	0.496724	0.021209	0.482821	0.059885	2.232578	0.994487
Ready_U-KNNBaseline	0.003444	0.001362	0.011760	0.496724	0.021209	0.482821	0.059885	2.232578	0.994487
Ready_U-KNN	0.000845	0.000274	0.002744	0.496441	0.007423	0.602121	0.010823	2.089186	0.995706
Self_TopRated	0.001043	0.000335	0.003348	0.496433	0.009544	0.699046	0.005051	1.945910	0.995669
Self_BaselineUI	0.000752	0.000168	0.001677	0.496424	0.009544	0.600530	0.005051	1.803126	0.996380
Self_IKNN	0.000214	0.000037	0.000368	0.496391	0.003181	0.392153	0.115440	4.174741	0.965327

Check metrics on toy dataset

import evaluation_measures as ev
import imp
import helpers
imp.reload(ev)

dir_path="Recommendations generated/toy-example/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/toy-example/test.csv', sep='\t', header=None)

display(ev.evaluate_all(test, dir_path, super_reactions, topK=3))
#also you can just type ev.evaluate_all() - I put above values as default

toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\t', header=None, names=['user', 'item', 'rating', 'timestamp'])
toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\t', header=None, names=['user', 'item', 'rating', 'timestamp'])
reco=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_reco.csv', header=None)
estimations=pd.read_csv('Recommendations generated/toy-example/Self_BaselineUI_estimations.csv', names=['user', 'item', 'est_score'])
toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \
toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)

print('Training data:')
display(toy_train_ui.todense())

print('Test data:')
display(toy_test_ui.todense())

print('Recommendations:')
display(reco)

print('Estimations:')
display(estimations)

3it [00:00, 4090.67it/s]

	Model	RMSE	MAE	precision	recall	F_1	F_05	precision_super	recall_super	NDCG	mAP	MRR	LAUC	HR	Reco in test	Test coverage	Shannon	Gini
0	Self_BaselineUI	1.648337	1.575	0.444444	0.888889	0.555556	0.478632	0.333333	0.75	0.72055	0.62963	0.666667	0.722222	1.0	0.777778	0.8	1.351784	0.357143

Training data:

matrix([[3, 4, 0, 0, 5, 0, 0, 4],
        [0, 1, 2, 3, 0, 0, 0, 0],
        [0, 0, 0, 5, 0, 3, 4, 0]], dtype=int64)

Test data:

matrix([[0, 0, 0, 0, 0, 0, 3, 0],
        [0, 0, 0, 0, 5, 0, 0, 0],
        [5, 0, 4, 0, 0, 0, 0, 2]], dtype=int64)

Recommendations:

	0	1	2	3	4	5	6
0	0	30	4.375000	60	4.375000	50	3.375000
1	10	40	4.166667	60	3.166667	70	3.166667
2	20	40	5.333333	70	4.333333	0	3.333333

Estimations:

	user	item	est_score
0	0	60	4.375000
1	10	40	4.166667
2	20	0	3.333333
3	20	20	2.333333
4	20	70	4.333333

Sample recommendations

train=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\t', header=None, names=['user', 'item', 'rating', 'timestamp'])
items=pd.read_csv('./Datasets/ml-100k/movies.csv')

user=random.choice(list(set(train['user'])))

train_content=pd.merge(train, items, left_on='item', right_on='id')

print('Here is what user rated high:')
display(train_content[train_content['user']==user][['user', 'rating', 'title', 'genres']]\
        .sort_values(by='rating', ascending=False)[:15])

reco = np.loadtxt('Recommendations generated/ml-100k/Self_BaselineUI_reco.csv', delimiter=',')
items=pd.read_csv('./Datasets/ml-100k/movies.csv')

# Let's ignore scores - they are not used in evaluation: 
reco_users=reco[:,:1]
reco_items=reco[:,1::2]
# Let's put them into one array
reco=np.concatenate((reco_users, reco_items), axis=1)

# Let's rebuild it user-item dataframe
recommended=[]
for row in reco:
    for rec_nb, entry in enumerate(row[1:]):
        recommended.append((row[0], rec_nb+1, entry))
recommended=pd.DataFrame(recommended, columns=['user','rec_nb', 'item'])

recommended_content=pd.merge(recommended, items, left_on='item', right_on='id')

print('Here is what we recommend:')
recommended_content[recommended_content['user']==user][['user', 'rec_nb', 'title', 'genres']].sort_values(by='rec_nb')

Here is what user rated high:

	user	rating	title	genres
269	523	5	Toy Story (1995)	Animation, Children's, Comedy
31247	523	5	Grease (1978)	Comedy, Musical, Romance
35233	523	5	Much Ado About Nothing (1993)	Comedy, Romance
35436	523	5	Fantasia (1940)	Animation, Children's, Musical
36537	523	5	Shine (1996)	Drama, Romance
37146	523	5	Contact (1997)	Drama, Sci-Fi
38982	523	5	Full Monty, The (1997)	Comedy
1197	523	5	Four Weddings and a Funeral (1994)	Comedy, Romance
44756	523	5	Butch Cassidy and the Sundance Kid (1969)	Action, Comedy, Western
45918	523	5	Wallace & Gromit: The Best of Aardman Animatio...	Animation
46339	523	5	Grand Day Out, A (1992)	Animation, Comedy
50119	523	5	Mrs. Brown (Her Majesty, Mrs. Brown) (1997)	Drama, Romance
50338	523	5	Close Shave, A (1995)	Animation, Comedy, Thriller
52950	523	5	Kolya (1996)	Comedy
53361	523	5	Multiplicity (1996)	Comedy

Here is what we recommend:

	user	rec_nb	title	genres
521	523.0	1	Great Day in Harlem, A (1994)	Documentary
1463	523.0	2	Tough and Deadly (1995)	Action, Drama, Thriller
2405	523.0	3	Aiqing wansui (1994)	Drama
3347	523.0	4	Delta of Venus (1994)	Drama
4289	523.0	5	Someone Else's America (1995)	Drama
5231	523.0	6	Saint of Fort Washington, The (1993)	Drama
6173	523.0	7	Celestial Clockwork (1994)	Comedy
7116	523.0	8	Some Mother's Son (1996)	Drama
9010	523.0	9	Maya Lin: A Strong Clear Vision (1994)	Documentary
8056	523.0	10	Prefontaine (1997)	Drama

project task 3: implement some other evaluation measure

# it may be your idea, modification of what we have already implemented 
# (for example Hit2 rate which would count as a success users whoreceived at least 2 relevant recommendations) 
# or something well-known
# expected output: modification of evaluation_measures.py such that evaluate_all will also display your measure

dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)

ev.evaluate_all(test, dir_path, super_reactions)

943it [00:00, 8687.43it/s]
943it [00:00, 7296.38it/s]
943it [00:00, 8704.77it/s]
943it [00:00, 8001.89it/s]
943it [00:00, 8997.15it/s]
943it [00:00, 8387.52it/s]
943it [00:00, 8062.71it/s]
943it [00:00, 7400.45it/s]
943it [00:00, 7525.94it/s]
943it [00:00, 8338.86it/s]
943it [00:00, 8715.87it/s]
943it [00:00, 8283.65it/s]
943it [00:00, 8345.05it/s]
943it [00:00, 7972.31it/s]
943it [00:00, 8179.38it/s]
943it [00:00, 8320.16it/s]

Model	RMSE	MAE	precision	recall	F_1	F_05	precision_super	recall_super	NDCG	mAP	MRR	LAUC	HR	Reco in test	Test coverage	Shannon	Gini
Self_RP3Beta	3.702446	3.527273	0.282185	0.192092	0.186749	0.216980	0.204185	0.240096	0.339114	0.204905	0.572157	0.593544	0.875928	1.000000	0.077201	3.875892	0.974947
Self_TopPop	2.508258	2.217909	0.188865	0.116919	0.118732	0.141584	0.130472	0.137473	0.214651	0.111707	0.400939	0.555546	0.765642	1.000000	0.038961	3.159079	0.987317
Ready_SVD	0.952784	0.750597	0.095228	0.047497	0.053142	0.067082	0.084871	0.076457	0.109075	0.050124	0.241366	0.520459	0.499470	0.992047	0.217893	4.405246	0.953484
Self_SVDBaseline	0.930321	0.734643	0.092683	0.042046	0.048568	0.063218	0.082940	0.068730	0.098937	0.044405	0.203936	0.517696	0.469777	1.000000	0.058442	3.085857	0.988824
Ready_SVDBiased	0.940375	0.742264	0.092153	0.039645	0.046804	0.061886	0.079399	0.055967	0.102017	0.047972	0.216876	0.516515	0.441145	0.997455	0.167388	4.235348	0.962085
Ready_Baseline	0.949459	0.752487	0.091410	0.037652	0.046030	0.061286	0.079614	0.056463	0.095957	0.043178	0.198193	0.515501	0.437964	1.000000	0.033911	2.836513	0.991139
Self_SVD	0.939326	0.740022	0.074549	0.031755	0.038425	0.050562	0.065665	0.050602	0.077117	0.031574	0.165509	0.512485	0.414634	0.981866	0.080087	3.858982	0.975271
Self_GlobalAvg	1.125760	0.943534	0.061188	0.025968	0.031383	0.041343	0.040558	0.032107	0.067695	0.027470	0.171187	0.509546	0.384942	1.000000	0.025974	2.711772	0.992003
Ready_Random	1.518551	1.218784	0.050583	0.024085	0.027323	0.034826	0.031223	0.026436	0.054902	0.020652	0.137928	0.508570	0.353128	0.987699	0.183261	5.093805	0.908215
Ready_I-KNN	1.030386	0.813067	0.026087	0.006908	0.010593	0.016046	0.021137	0.009522	0.024214	0.008958	0.048068	0.499885	0.154825	0.402333	0.434343	5.133650	0.877999
Ready_I-KNNBaseline	0.935327	0.737424	0.002545	0.000755	0.001105	0.001602	0.002253	0.000930	0.003444	0.001362	0.011760	0.496724	0.021209	0.482821	0.059885	2.232578	0.994487
Ready_U-KNNBaseline	0.935327	0.737424	0.002545	0.000755	0.001105	0.001602	0.002253	0.000930	0.003444	0.001362	0.011760	0.496724	0.021209	0.482821	0.059885	2.232578	0.994487
Ready_U-KNN	1.023495	0.807913	0.000742	0.000205	0.000305	0.000449	0.000536	0.000198	0.000845	0.000274	0.002744	0.496441	0.007423	0.602121	0.010823	2.089186	0.995706
Self_TopRated	1.033085	0.822057	0.000954	0.000188	0.000298	0.000481	0.000644	0.000223	0.001043	0.000335	0.003348	0.496433	0.009544	0.699046	0.005051	1.945910	0.995669
Self_BaselineUI	0.967585	0.762740	0.000954	0.000170	0.000278	0.000463	0.000644	0.000189	0.000752	0.000168	0.001677	0.496424	0.009544	0.600530	0.005051	1.803126	0.996380
Self_IKNN	1.018363	0.808793	0.000318	0.000108	0.000140	0.000189	0.000000	0.000000	0.000214	0.000037	0.000368	0.496391	0.003181	0.392153	0.115440	4.174741	0.965327

82 KiB Raw Blame History

Prepare test set

Estimations metrics

Ranking metrics

Diversity metrics

To be used in other notebooks

Check metrics on toy dataset

Sample recommendations

project task 3: implement some other evaluation measure

82 KiB

Raw Blame History