REK-proj-2/evaluation_and_testing/testing.py

# Load libraries ---------------------------------------------

import numpy as np
import pandas as pd
from sklearn.model_selection import KFold

from evaluation_and_testing.evaluation_measures import rmse
from evaluation_and_testing.evaluation_measures import mape
from evaluation_and_testing.evaluation_measures import tre
from evaluation_and_testing.evaluation_measures import hr
from evaluation_and_testing.evaluation_measures import ndcg

# ------------------------------------------------------------


def evaluate_train_test_split_explicit(recommender, interactions_df, items_df, seed=6789):
    rng = np.random.RandomState(seed=seed)

    if isinstance(interactions_df, dict):
        # If interactions_df is a dict with already split data, use the split
        interactions_df_train = interactions_df['train']
        interactions_df_test = interactions_df['test']
    else:
        # Otherwise split the dataset into train and test

        shuffle = np.arange(len(interactions_df))
        rng.shuffle(shuffle)
        shuffle = list(shuffle)

        train_test_split = 0.8
        split_index = int(len(interactions_df) * train_test_split)

        interactions_df_train = interactions_df.iloc[shuffle[:split_index]]
        interactions_df_test = interactions_df.iloc[shuffle[split_index:]]

    # Train the recommender

    recommender.fit(interactions_df_train, None, items_df)

    # Gather predictions

    r_pred = []

    for idx, row in interactions_df_test.iterrows():
        users_df = pd.DataFrame([row['user_id']], columns=['user_id'])
        eval_items_df = pd.DataFrame([row['item_id']], columns=['item_id'])
        eval_items_df = pd.merge(eval_items_df, items_df, on='item_id')
        recommendations = recommender.recommend(users_df, eval_items_df, n_recommendations=1)

        r_pred.append(recommendations.iloc[0]['score'])

    # Gather real ratings

    r_real = np.array(interactions_df_test['rating'].tolist())

    # Return evaluation metrics

    return rmse(r_pred, r_real), mape(r_pred, r_real), tre(r_pred, r_real)


def evaluate_train_test_split_implicit(recommender, interactions_df, items_df, seed=6789):
    # Write your code here
    rng = np.random.RandomState(seed=seed)

    if isinstance(interactions_df, dict):
        # If interactions_df is a dict with already split data, use the split
        interactions_df_train = interactions_df['train']
        interactions_df_test = interactions_df['test']
    else:
        # Otherwise split the dataset into train and test

        shuffle = np.arange(len(interactions_df))
        rng.shuffle(shuffle)
        shuffle = list(shuffle)

        train_test_split = 0.8
        split_index = int(len(interactions_df) * train_test_split)

        interactions_df_train = interactions_df.iloc[shuffle[:split_index]]
        interactions_df_test = interactions_df.iloc[shuffle[split_index:]]

    hr_1 = []
    hr_3 = []
    hr_5 = []
    hr_10 = []
    ndcg_1 = []
    ndcg_3 = []
    ndcg_5 = []
    ndcg_10 = []

    # Train the recommender

    recommender.fit(interactions_df_train, None, items_df)

    # Make recommendations for each user in the test set and calculate the metric
    # against all items of that user in the test set

    test_user_interactions = interactions_df_test.groupby(by='user_id')

    for user_id, user_interactions in test_user_interactions:

        recommendations = recommender.recommend(pd.DataFrame([user_id], columns=['user_id']),
                                                items_df, n_recommendations=10)

        hr_1.append(hr(recommendations, user_interactions, n=1))
        hr_3.append(hr(recommendations, user_interactions, n=3))
        hr_5.append(hr(recommendations, user_interactions, n=5))
        hr_10.append(hr(recommendations, user_interactions, n=10))
        ndcg_1.append(ndcg(recommendations, user_interactions, n=1))
        ndcg_3.append(ndcg(recommendations, user_interactions, n=3))
        ndcg_5.append(ndcg(recommendations, user_interactions, n=5))
        ndcg_10.append(ndcg(recommendations, user_interactions, n=10))

    hr_1 = np.mean(hr_1)
    hr_3 = np.mean(hr_3)
    hr_5 = np.mean(hr_5)
    hr_10 = np.mean(hr_10)
    ndcg_1 = np.mean(ndcg_1)
    ndcg_3 = np.mean(ndcg_3)
    ndcg_5 = np.mean(ndcg_5)
    ndcg_10 = np.mean(ndcg_10)

    return hr_1, hr_3, hr_5, hr_10, ndcg_1, ndcg_3, ndcg_5, ndcg_10


def evaluate_leave_one_out_explicit(recommender, interactions_df, items_df, max_evals=300, seed=6789):
    rng = np.random.RandomState(seed=seed)

    # Prepare splits of the datasets
    kf = KFold(n_splits=len(interactions_df), random_state=rng, shuffle=True)

    # For each split of the dataset train the recommender, generate recommendations and evaluate

    r_pred = []
    r_real = []
    n_eval = 1
    for train_index, test_index in kf.split(interactions_df.index):
        interactions_df_train = interactions_df.loc[interactions_df.index[train_index]]
        interactions_df_test = interactions_df.loc[interactions_df.index[test_index]]

        recommender.fit(interactions_df_train, None, items_df)
        recommendations = recommender.recommend(
            interactions_df_test.loc[:, ['user_id']],
            items_df.loc[items_df['item_id'] == interactions_df_test.iloc[0]['item_id']])

        r_pred.append(recommendations.iloc[0]['score'])
        r_real.append(interactions_df_test.iloc[0]['rating'])

        if n_eval == max_evals:
            break
        n_eval += 1

    r_pred = np.array(r_pred)
    r_real = np.array(r_real)

    # Return evaluation metrics

    return rmse(r_pred, r_real), mape(r_pred, r_real), tre(r_pred, r_real)


def evaluate_leave_one_out_implicit(recommender, interactions_df, items_df, max_evals=300, seed=6789):
    rng = np.random.RandomState(seed=seed)

    # Prepare splits of the datasets
    kf = KFold(n_splits=len(interactions_df), random_state=rng, shuffle=True)

    hr_1 = []
    hr_3 = []
    hr_5 = []
    hr_10 = []
    ndcg_1 = []
    ndcg_3 = []
    ndcg_5 = []
    ndcg_10 = []

    # For each split of the dataset train the recommender, generate recommendations and evaluate

    n_eval = 1
    for train_index, test_index in kf.split(interactions_df.index):
        interactions_df_train = interactions_df.loc[interactions_df.index[train_index]]
        interactions_df_test = interactions_df.loc[interactions_df.index[test_index]]

        recommender.fit(interactions_df_train, None, items_df)
        recommendations = recommender.recommend(
            interactions_df_test.loc[:, ['user_id']], items_df, n_recommendations=10)

        hr_1.append(hr(recommendations, interactions_df_test, n=1))
        hr_3.append(hr(recommendations, interactions_df_test, n=3))
        hr_5.append(hr(recommendations, interactions_df_test, n=5))
        hr_10.append(hr(recommendations, interactions_df_test, n=10))
        ndcg_1.append(ndcg(recommendations, interactions_df_test, n=1))
        ndcg_3.append(ndcg(recommendations, interactions_df_test, n=3))
        ndcg_5.append(ndcg(recommendations, interactions_df_test, n=5))
        ndcg_10.append(ndcg(recommendations, interactions_df_test, n=10))

        if n_eval == max_evals:
            break
        n_eval += 1

    hr_1 = np.mean(hr_1)
    hr_3 = np.mean(hr_3)
    hr_5 = np.mean(hr_5)
    hr_10 = np.mean(hr_10)
    ndcg_1 = np.mean(ndcg_1)
    ndcg_3 = np.mean(ndcg_3)
    ndcg_5 = np.mean(ndcg_5)
    ndcg_10 = np.mean(ndcg_10)

    return hr_1, hr_3, hr_5, hr_10, ndcg_1, ndcg_3, ndcg_5, ndcg_10
Add recommender with HR10 0.116 2021-06-28 20:18:14 +02:00			`# Load libraries ---------------------------------------------`

			`import numpy as np`
			`import pandas as pd`
			`from sklearn.model_selection import KFold`

			`from evaluation_and_testing.evaluation_measures import rmse`
			`from evaluation_and_testing.evaluation_measures import mape`
			`from evaluation_and_testing.evaluation_measures import tre`
			`from evaluation_and_testing.evaluation_measures import hr`
			`from evaluation_and_testing.evaluation_measures import ndcg`

			`# ------------------------------------------------------------`


			`def evaluate_train_test_split_explicit(recommender, interactions_df, items_df, seed=6789):`
			`rng = np.random.RandomState(seed=seed)`

			`if isinstance(interactions_df, dict):`
			`# If interactions_df is a dict with already split data, use the split`
			`interactions_df_train = interactions_df['train']`
			`interactions_df_test = interactions_df['test']`
			`else:`
			`# Otherwise split the dataset into train and test`

			`shuffle = np.arange(len(interactions_df))`
			`rng.shuffle(shuffle)`
			`shuffle = list(shuffle)`

			`train_test_split = 0.8`
			`split_index = int(len(interactions_df) * train_test_split)`

			`interactions_df_train = interactions_df.iloc[shuffle[:split_index]]`
			`interactions_df_test = interactions_df.iloc[shuffle[split_index:]]`

			`# Train the recommender`

			`recommender.fit(interactions_df_train, None, items_df)`

			`# Gather predictions`

			`r_pred = []`

			`for idx, row in interactions_df_test.iterrows():`
			`users_df = pd.DataFrame([row['user_id']], columns=['user_id'])`
			`eval_items_df = pd.DataFrame([row['item_id']], columns=['item_id'])`
			`eval_items_df = pd.merge(eval_items_df, items_df, on='item_id')`
			`recommendations = recommender.recommend(users_df, eval_items_df, n_recommendations=1)`

			`r_pred.append(recommendations.iloc[0]['score'])`

			`# Gather real ratings`

			`r_real = np.array(interactions_df_test['rating'].tolist())`

			`# Return evaluation metrics`

			`return rmse(r_pred, r_real), mape(r_pred, r_real), tre(r_pred, r_real)`


			`def evaluate_train_test_split_implicit(recommender, interactions_df, items_df, seed=6789):`
			`# Write your code here`
			`rng = np.random.RandomState(seed=seed)`

			`if isinstance(interactions_df, dict):`
			`# If interactions_df is a dict with already split data, use the split`
			`interactions_df_train = interactions_df['train']`
			`interactions_df_test = interactions_df['test']`
			`else:`
			`# Otherwise split the dataset into train and test`

			`shuffle = np.arange(len(interactions_df))`
			`rng.shuffle(shuffle)`
			`shuffle = list(shuffle)`

			`train_test_split = 0.8`
			`split_index = int(len(interactions_df) * train_test_split)`

			`interactions_df_train = interactions_df.iloc[shuffle[:split_index]]`
			`interactions_df_test = interactions_df.iloc[shuffle[split_index:]]`

			`hr_1 = []`
			`hr_3 = []`
			`hr_5 = []`
			`hr_10 = []`
			`ndcg_1 = []`
			`ndcg_3 = []`
			`ndcg_5 = []`
			`ndcg_10 = []`

			`# Train the recommender`

			`recommender.fit(interactions_df_train, None, items_df)`

			`# Make recommendations for each user in the test set and calculate the metric`
			`# against all items of that user in the test set`

			`test_user_interactions = interactions_df_test.groupby(by='user_id')`

			`for user_id, user_interactions in test_user_interactions:`

			`recommendations = recommender.recommend(pd.DataFrame([user_id], columns=['user_id']),`
			`items_df, n_recommendations=10)`

			`hr_1.append(hr(recommendations, user_interactions, n=1))`
			`hr_3.append(hr(recommendations, user_interactions, n=3))`
			`hr_5.append(hr(recommendations, user_interactions, n=5))`
			`hr_10.append(hr(recommendations, user_interactions, n=10))`
			`ndcg_1.append(ndcg(recommendations, user_interactions, n=1))`
			`ndcg_3.append(ndcg(recommendations, user_interactions, n=3))`
			`ndcg_5.append(ndcg(recommendations, user_interactions, n=5))`
			`ndcg_10.append(ndcg(recommendations, user_interactions, n=10))`

			`hr_1 = np.mean(hr_1)`
			`hr_3 = np.mean(hr_3)`
			`hr_5 = np.mean(hr_5)`
			`hr_10 = np.mean(hr_10)`
			`ndcg_1 = np.mean(ndcg_1)`
			`ndcg_3 = np.mean(ndcg_3)`
			`ndcg_5 = np.mean(ndcg_5)`
			`ndcg_10 = np.mean(ndcg_10)`

			`return hr_1, hr_3, hr_5, hr_10, ndcg_1, ndcg_3, ndcg_5, ndcg_10`


			`def evaluate_leave_one_out_explicit(recommender, interactions_df, items_df, max_evals=300, seed=6789):`
			`rng = np.random.RandomState(seed=seed)`

			`# Prepare splits of the datasets`
			`kf = KFold(n_splits=len(interactions_df), random_state=rng, shuffle=True)`

			`# For each split of the dataset train the recommender, generate recommendations and evaluate`

			`r_pred = []`
			`r_real = []`
			`n_eval = 1`
			`for train_index, test_index in kf.split(interactions_df.index):`
			`interactions_df_train = interactions_df.loc[interactions_df.index[train_index]]`
			`interactions_df_test = interactions_df.loc[interactions_df.index[test_index]]`

			`recommender.fit(interactions_df_train, None, items_df)`
			`recommendations = recommender.recommend(`
			`interactions_df_test.loc[:, ['user_id']],`
			`items_df.loc[items_df['item_id'] == interactions_df_test.iloc[0]['item_id']])`

			`r_pred.append(recommendations.iloc[0]['score'])`
			`r_real.append(interactions_df_test.iloc[0]['rating'])`

			`if n_eval == max_evals:`
			`break`
			`n_eval += 1`

			`r_pred = np.array(r_pred)`
			`r_real = np.array(r_real)`

			`# Return evaluation metrics`

			`return rmse(r_pred, r_real), mape(r_pred, r_real), tre(r_pred, r_real)`


			`def evaluate_leave_one_out_implicit(recommender, interactions_df, items_df, max_evals=300, seed=6789):`
			`rng = np.random.RandomState(seed=seed)`

			`# Prepare splits of the datasets`
			`kf = KFold(n_splits=len(interactions_df), random_state=rng, shuffle=True)`

			`hr_1 = []`
			`hr_3 = []`
			`hr_5 = []`
			`hr_10 = []`
			`ndcg_1 = []`
			`ndcg_3 = []`
			`ndcg_5 = []`
			`ndcg_10 = []`

			`# For each split of the dataset train the recommender, generate recommendations and evaluate`

			`n_eval = 1`
			`for train_index, test_index in kf.split(interactions_df.index):`
			`interactions_df_train = interactions_df.loc[interactions_df.index[train_index]]`
			`interactions_df_test = interactions_df.loc[interactions_df.index[test_index]]`

			`recommender.fit(interactions_df_train, None, items_df)`
			`recommendations = recommender.recommend(`
			`interactions_df_test.loc[:, ['user_id']], items_df, n_recommendations=10)`

			`hr_1.append(hr(recommendations, interactions_df_test, n=1))`
			`hr_3.append(hr(recommendations, interactions_df_test, n=3))`
			`hr_5.append(hr(recommendations, interactions_df_test, n=5))`
			`hr_10.append(hr(recommendations, interactions_df_test, n=10))`
			`ndcg_1.append(ndcg(recommendations, interactions_df_test, n=1))`
			`ndcg_3.append(ndcg(recommendations, interactions_df_test, n=3))`
			`ndcg_5.append(ndcg(recommendations, interactions_df_test, n=5))`
			`ndcg_10.append(ndcg(recommendations, interactions_df_test, n=10))`

			`if n_eval == max_evals:`
			`break`
			`n_eval += 1`

			`hr_1 = np.mean(hr_1)`
			`hr_3 = np.mean(hr_3)`
			`hr_5 = np.mean(hr_5)`
			`hr_10 = np.mean(hr_10)`
			`ndcg_1 = np.mean(ndcg_1)`
			`ndcg_3 = np.mean(ndcg_3)`
			`ndcg_5 = np.mean(ndcg_5)`
			`ndcg_10 = np.mean(ndcg_10)`

			`return hr_1, hr_3, hr_5, hr_10, ndcg_1, ndcg_3, ndcg_5, ndcg_10`