REK-proj-2/evaluation_and_testing/testing.py

210 lines
7.3 KiB
Python
Raw Normal View History

2021-06-28 20:18:14 +02:00
# Load libraries ---------------------------------------------
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from evaluation_and_testing.evaluation_measures import rmse
from evaluation_and_testing.evaluation_measures import mape
from evaluation_and_testing.evaluation_measures import tre
from evaluation_and_testing.evaluation_measures import hr
from evaluation_and_testing.evaluation_measures import ndcg
# ------------------------------------------------------------
def evaluate_train_test_split_explicit(recommender, interactions_df, items_df, seed=6789):
rng = np.random.RandomState(seed=seed)
if isinstance(interactions_df, dict):
# If interactions_df is a dict with already split data, use the split
interactions_df_train = interactions_df['train']
interactions_df_test = interactions_df['test']
else:
# Otherwise split the dataset into train and test
shuffle = np.arange(len(interactions_df))
rng.shuffle(shuffle)
shuffle = list(shuffle)
train_test_split = 0.8
split_index = int(len(interactions_df) * train_test_split)
interactions_df_train = interactions_df.iloc[shuffle[:split_index]]
interactions_df_test = interactions_df.iloc[shuffle[split_index:]]
# Train the recommender
recommender.fit(interactions_df_train, None, items_df)
# Gather predictions
r_pred = []
for idx, row in interactions_df_test.iterrows():
users_df = pd.DataFrame([row['user_id']], columns=['user_id'])
eval_items_df = pd.DataFrame([row['item_id']], columns=['item_id'])
eval_items_df = pd.merge(eval_items_df, items_df, on='item_id')
recommendations = recommender.recommend(users_df, eval_items_df, n_recommendations=1)
r_pred.append(recommendations.iloc[0]['score'])
# Gather real ratings
r_real = np.array(interactions_df_test['rating'].tolist())
# Return evaluation metrics
return rmse(r_pred, r_real), mape(r_pred, r_real), tre(r_pred, r_real)
def evaluate_train_test_split_implicit(recommender, interactions_df, items_df, seed=6789):
# Write your code here
rng = np.random.RandomState(seed=seed)
if isinstance(interactions_df, dict):
# If interactions_df is a dict with already split data, use the split
interactions_df_train = interactions_df['train']
interactions_df_test = interactions_df['test']
else:
# Otherwise split the dataset into train and test
shuffle = np.arange(len(interactions_df))
rng.shuffle(shuffle)
shuffle = list(shuffle)
train_test_split = 0.8
split_index = int(len(interactions_df) * train_test_split)
interactions_df_train = interactions_df.iloc[shuffle[:split_index]]
interactions_df_test = interactions_df.iloc[shuffle[split_index:]]
hr_1 = []
hr_3 = []
hr_5 = []
hr_10 = []
ndcg_1 = []
ndcg_3 = []
ndcg_5 = []
ndcg_10 = []
# Train the recommender
recommender.fit(interactions_df_train, None, items_df)
# Make recommendations for each user in the test set and calculate the metric
# against all items of that user in the test set
test_user_interactions = interactions_df_test.groupby(by='user_id')
for user_id, user_interactions in test_user_interactions:
recommendations = recommender.recommend(pd.DataFrame([user_id], columns=['user_id']),
items_df, n_recommendations=10)
hr_1.append(hr(recommendations, user_interactions, n=1))
hr_3.append(hr(recommendations, user_interactions, n=3))
hr_5.append(hr(recommendations, user_interactions, n=5))
hr_10.append(hr(recommendations, user_interactions, n=10))
ndcg_1.append(ndcg(recommendations, user_interactions, n=1))
ndcg_3.append(ndcg(recommendations, user_interactions, n=3))
ndcg_5.append(ndcg(recommendations, user_interactions, n=5))
ndcg_10.append(ndcg(recommendations, user_interactions, n=10))
hr_1 = np.mean(hr_1)
hr_3 = np.mean(hr_3)
hr_5 = np.mean(hr_5)
hr_10 = np.mean(hr_10)
ndcg_1 = np.mean(ndcg_1)
ndcg_3 = np.mean(ndcg_3)
ndcg_5 = np.mean(ndcg_5)
ndcg_10 = np.mean(ndcg_10)
return hr_1, hr_3, hr_5, hr_10, ndcg_1, ndcg_3, ndcg_5, ndcg_10
def evaluate_leave_one_out_explicit(recommender, interactions_df, items_df, max_evals=300, seed=6789):
rng = np.random.RandomState(seed=seed)
# Prepare splits of the datasets
kf = KFold(n_splits=len(interactions_df), random_state=rng, shuffle=True)
# For each split of the dataset train the recommender, generate recommendations and evaluate
r_pred = []
r_real = []
n_eval = 1
for train_index, test_index in kf.split(interactions_df.index):
interactions_df_train = interactions_df.loc[interactions_df.index[train_index]]
interactions_df_test = interactions_df.loc[interactions_df.index[test_index]]
recommender.fit(interactions_df_train, None, items_df)
recommendations = recommender.recommend(
interactions_df_test.loc[:, ['user_id']],
items_df.loc[items_df['item_id'] == interactions_df_test.iloc[0]['item_id']])
r_pred.append(recommendations.iloc[0]['score'])
r_real.append(interactions_df_test.iloc[0]['rating'])
if n_eval == max_evals:
break
n_eval += 1
r_pred = np.array(r_pred)
r_real = np.array(r_real)
# Return evaluation metrics
return rmse(r_pred, r_real), mape(r_pred, r_real), tre(r_pred, r_real)
def evaluate_leave_one_out_implicit(recommender, interactions_df, items_df, max_evals=300, seed=6789):
rng = np.random.RandomState(seed=seed)
# Prepare splits of the datasets
kf = KFold(n_splits=len(interactions_df), random_state=rng, shuffle=True)
hr_1 = []
hr_3 = []
hr_5 = []
hr_10 = []
ndcg_1 = []
ndcg_3 = []
ndcg_5 = []
ndcg_10 = []
# For each split of the dataset train the recommender, generate recommendations and evaluate
n_eval = 1
for train_index, test_index in kf.split(interactions_df.index):
interactions_df_train = interactions_df.loc[interactions_df.index[train_index]]
interactions_df_test = interactions_df.loc[interactions_df.index[test_index]]
recommender.fit(interactions_df_train, None, items_df)
recommendations = recommender.recommend(
interactions_df_test.loc[:, ['user_id']], items_df, n_recommendations=10)
hr_1.append(hr(recommendations, interactions_df_test, n=1))
hr_3.append(hr(recommendations, interactions_df_test, n=3))
hr_5.append(hr(recommendations, interactions_df_test, n=5))
hr_10.append(hr(recommendations, interactions_df_test, n=10))
ndcg_1.append(ndcg(recommendations, interactions_df_test, n=1))
ndcg_3.append(ndcg(recommendations, interactions_df_test, n=3))
ndcg_5.append(ndcg(recommendations, interactions_df_test, n=5))
ndcg_10.append(ndcg(recommendations, interactions_df_test, n=10))
if n_eval == max_evals:
break
n_eval += 1
hr_1 = np.mean(hr_1)
hr_3 = np.mean(hr_3)
hr_5 = np.mean(hr_5)
hr_10 = np.mean(hr_10)
ndcg_1 = np.mean(ndcg_1)
ndcg_3 = np.mean(ndcg_3)
ndcg_5 = np.mean(ndcg_5)
ndcg_10 = np.mean(ndcg_10)
return hr_1, hr_3, hr_5, hr_10, ndcg_1, ndcg_3, ndcg_5, ndcg_10