diff --git a/project_2_recommender_and_evaluation-0_116.ipynb b/project_2_recommender_and_evaluation-0_116.ipynb deleted file mode 100644 index ab5b05d..0000000 --- a/project_2_recommender_and_evaluation-0_116.ipynb +++ /dev/null @@ -1,1873 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 302, - "id": "alike-morgan", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], - "source": [ - "%matplotlib inline\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "from IPython.display import Markdown, display, HTML\n", - "from collections import defaultdict\n", - "\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", - "from livelossplot import PlotLosses\n", - "\n", - "# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n", - "import os\n", - "os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'" - ] - }, - { - "cell_type": "markdown", - "id": "blessed-knitting", - "metadata": {}, - "source": [ - "# Load the dataset for recommenders" - ] - }, - { - "cell_type": "code", - "execution_count": 303, - "id": "victorian-bottom", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iditem_idtermlength_of_stay_bucketrate_planroom_segmentn_people_bucketweekend_stay
010WinterVacation[2-3]Standard[260-360][5-inf]True
121WinterVacation[2-3]Standard[160-260][3-4]True
232WinterVacation[2-3]Standard[160-260][2-2]False
343WinterVacation[4-7]Standard[160-260][3-4]True
454WinterVacation[4-7]Standard[0-160][2-2]True
565Easter[4-7]Standard[260-360][5-inf]True
676OffSeason[2-3]Standard[260-360][5-inf]True
787HighSeason[2-3]Standard[160-260][1-1]True
898HighSeason[2-3]Standard[0-160][1-1]True
987HighSeason[2-3]Standard[160-260][1-1]True
1087HighSeason[2-3]Standard[160-260][1-1]True
11109HighSeason[2-3]Standard[160-260][3-4]True
12119HighSeason[2-3]Standard[160-260][3-4]True
131210HighSeason[8-inf]Standard[160-260][3-4]True
141411HighSeason[2-3]Standard[0-160][3-4]True
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "data_path = os.path.join(\"data\", \"hotel_data\")\n", - "\n", - "interactions_df = pd.read_csv(os.path.join(data_path, \"hotel_data_interactions_df.csv\"), index_col=0)\n", - "\n", - "base_item_features = ['term', 'length_of_stay_bucket', 'rate_plan', 'room_segment', 'n_people_bucket', 'weekend_stay']\n", - "\n", - "column_values_dict = {\n", - " 'term': ['WinterVacation', 'Easter', 'OffSeason', 'HighSeason', 'LowSeason', 'MayLongWeekend', 'NewYear', 'Christmas'],\n", - " 'length_of_stay_bucket': ['[0-1]', '[2-3]', '[4-7]', '[8-inf]'],\n", - " 'rate_plan': ['Standard', 'Nonref'],\n", - " 'room_segment': ['[0-160]', '[160-260]', '[260-360]', '[360-500]', '[500-900]'],\n", - " 'n_people_bucket': ['[1-1]', '[2-2]', '[3-4]', '[5-inf]'],\n", - " 'weekend_stay': ['True', 'False']\n", - "}\n", - "\n", - "interactions_df.loc[:, 'term'] = pd.Categorical(\n", - " interactions_df['term'], categories=column_values_dict['term'])\n", - "interactions_df.loc[:, 'length_of_stay_bucket'] = pd.Categorical(\n", - " interactions_df['length_of_stay_bucket'], categories=column_values_dict['length_of_stay_bucket'])\n", - "interactions_df.loc[:, 'rate_plan'] = pd.Categorical(\n", - " interactions_df['rate_plan'], categories=column_values_dict['rate_plan'])\n", - "interactions_df.loc[:, 'room_segment'] = pd.Categorical(\n", - " interactions_df['room_segment'], categories=column_values_dict['room_segment'])\n", - "interactions_df.loc[:, 'n_people_bucket'] = pd.Categorical(\n", - " interactions_df['n_people_bucket'], categories=column_values_dict['n_people_bucket'])\n", - "interactions_df.loc[:, 'weekend_stay'] = interactions_df['weekend_stay'].astype('str')\n", - "interactions_df.loc[:, 'weekend_stay'] = pd.Categorical(\n", - " interactions_df['weekend_stay'], categories=column_values_dict['weekend_stay'])\n", - "\n", - "display(HTML(interactions_df.head(15).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "realistic-third", - "metadata": {}, - "source": [ - "# (Optional) Prepare numerical user features\n", - "\n", - "The method below is left here for convenience if you want to experiment with content-based user features as an input for your neural network." - ] - }, - { - "cell_type": "code", - "execution_count": 304, - "id": "variable-jaguar", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_length_of_stay_bucket_[0-1]', 'user_length_of_stay_bucket_[2-3]', 'user_length_of_stay_bucket_[4-7]', 'user_length_of_stay_bucket_[8-inf]', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iduser_term_WinterVacationuser_term_Easteruser_term_OffSeasonuser_term_HighSeasonuser_term_LowSeasonuser_term_MayLongWeekenduser_term_NewYearuser_term_Christmasuser_length_of_stay_bucket_[0-1]user_length_of_stay_bucket_[2-3]user_length_of_stay_bucket_[4-7]user_length_of_stay_bucket_[8-inf]user_rate_plan_Standarduser_rate_plan_Nonrefuser_room_segment_[0-160]user_room_segment_[160-260]user_room_segment_[260-360]user_room_segment_[360-500]user_room_segment_[500-900]user_n_people_bucket_[1-1]user_n_people_bucket_[2-2]user_n_people_bucket_[3-4]user_n_people_bucket_[5-inf]user_weekend_stay_Trueuser_weekend_stay_False
010.1304350.00.6521740.0869570.1304350.0000000.0000000.0000000.0000000.6086960.3913040.0000000.5217390.4782610.0000000.8695650.1304350.0000000.00.0000000.7391300.1739130.0869570.7826090.217391
47500.0434780.00.4347830.3043480.2173910.0000000.0000000.0000000.0000000.9130430.0869570.0000000.2608700.7391300.0000000.5652170.4347830.0000000.00.0000000.1739130.5217390.3043480.7826090.217391
92960.0833330.00.7083330.1250000.0416670.0416670.0000000.0000000.2500000.6666670.0416670.0416670.2916670.7083330.1250000.7916670.0833330.0000000.00.0416670.3333330.5416670.0833330.7500000.250000
1111150.7272730.00.2727270.0000000.0000000.0000000.0000000.0000000.5000000.3636360.1363640.0000001.0000000.0000000.0000000.8181820.1818180.0000000.00.8181820.0909090.0454550.0454550.3636360.636364
6757060.0919880.00.4510390.1899110.2077150.0385760.0118690.0089020.1691390.4599410.2729970.0979230.9940650.0059350.0207720.8397630.1305640.0089020.00.0415430.0949550.7388720.1246290.6765580.323442
169917360.0344830.00.4827590.2068970.2758620.0000000.0000000.0000000.2413790.5517240.2068970.0000000.1724140.8275860.0000000.9310340.0689660.0000000.00.3793100.4137930.2068970.0000000.4482760.551724
763977790.0370370.00.2962960.2592590.3703700.0000000.0000000.0370370.1111110.2962960.4814810.1111111.0000000.0000000.0000000.8148150.1851850.0000000.00.0000000.0370370.7407410.2222220.8148150.185185
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def n_to_p(l):\n", - " n = sum(l)\n", - " return [x / n for x in l] if n > 0 else l\n", - "\n", - "def calculate_p(x, values):\n", - " counts = [0]*len(values)\n", - " for v in x:\n", - " counts[values.index(v)] += 1\n", - "\n", - " return n_to_p(counts)\n", - "\n", - "def prepare_users_df(interactions_df):\n", - "\n", - " users_df = interactions_df.loc[:, [\"user_id\"]]\n", - " users_df = users_df.groupby(\"user_id\").first().reset_index(drop=False)\n", - " \n", - " user_features = []\n", - "\n", - " for column in base_item_features:\n", - "\n", - " column_values = column_values_dict[column]\n", - " df = interactions_df.loc[:, ['user_id', column]]\n", - " df = df.groupby('user_id').aggregate(lambda x: list(x)).reset_index(drop=False)\n", - "\n", - " def calc_p(x):\n", - " return calculate_p(x, column_values)\n", - "\n", - " df.loc[:, column] = df[column].apply(lambda x: calc_p(x))\n", - "\n", - " p_columns = []\n", - " for i in range(len(column_values)):\n", - " p_columns.append(\"user_\" + column + \"_\" + column_values[i])\n", - " df.loc[:, p_columns[i]] = df[column].apply(lambda x: x[i])\n", - " user_features.append(p_columns[i])\n", - "\n", - " users_df = pd.merge(users_df, df.loc[:, ['user_id'] + p_columns], on=[\"user_id\"])\n", - " \n", - " return users_df, user_features\n", - " \n", - "\n", - "users_df, user_features = prepare_users_df(interactions_df)\n", - "\n", - "print(user_features)\n", - "\n", - "display(HTML(users_df.loc[users_df['user_id'].isin([706, 1736, 7779, 96, 1, 50, 115])].head(15).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "amino-keyboard", - "metadata": {}, - "source": [ - "# (Optional) Prepare numerical item features\n", - "\n", - "The method below is left here for convenience if you want to experiment with content-based item features as an input for your neural network." - ] - }, - { - "cell_type": "code", - "execution_count": 305, - "id": "formal-munich", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'length_of_stay_bucket_[0-1]', 'length_of_stay_bucket_[2-3]', 'length_of_stay_bucket_[4-7]', 'length_of_stay_bucket_[8-inf]', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
item_idterm_WinterVacationterm_Easterterm_OffSeasonterm_HighSeasonterm_LowSeasonterm_MayLongWeekendterm_NewYearterm_Christmaslength_of_stay_bucket_[0-1]length_of_stay_bucket_[2-3]length_of_stay_bucket_[4-7]length_of_stay_bucket_[8-inf]rate_plan_Standardrate_plan_Nonrefroom_segment_[0-160]room_segment_[160-260]room_segment_[260-360]room_segment_[360-500]room_segment_[500-900]n_people_bucket_[1-1]n_people_bucket_[2-2]n_people_bucket_[3-4]n_people_bucket_[5-inf]weekend_stay_Trueweekend_stay_False
001000000001001000100000110
111000000001001001000001010
221000000001001001000010001
331000000000101001000001010
441000000000101010000010010
550100000000101000100000110
660010000001001000100000110
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def map_items_to_onehot(df):\n", - " one_hot = pd.get_dummies(df.loc[:, base_item_features])\n", - " df = df.drop(base_item_features, axis = 1)\n", - " df = df.join(one_hot)\n", - " \n", - " return df, list(one_hot.columns)\n", - "\n", - "def prepare_items_df(interactions_df):\n", - " items_df = interactions_df.loc[:, [\"item_id\"] + base_item_features].drop_duplicates()\n", - " \n", - " items_df, item_features = map_items_to_onehot(items_df)\n", - " \n", - " return items_df, item_features\n", - "\n", - "\n", - "items_df, item_features = prepare_items_df(interactions_df)\n", - "\n", - "print(item_features)\n", - "\n", - "display(HTML(items_df.loc[items_df['item_id'].isin([0, 1, 2, 3, 4, 5, 6])].head(15).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "figured-imaging", - "metadata": {}, - "source": [ - "# Neural network recommender\n", - "\n", - "**Task:**
\n", - "Code a recommender based on a neural network model. You are free to choose any network architecture you find appropriate. The network can use the interaction vectors for users and items, embeddings of users and items, as well as user and item features (you can use the features you developed in the first project).\n", - "\n", - "Remember to keep control over randomness - in the init method add the seed as a parameter and initialize the random seed generator with that seed (both for numpy and pytorch):\n", - "\n", - "```python\n", - "self.seed = seed\n", - "self.rng = np.random.RandomState(seed=seed)\n", - "```\n", - "in the network model:\n", - "```python\n", - "self.seed = torch.manual_seed(seed)\n", - "```\n", - "\n", - "You are encouraged to experiment with:\n", - " - the number of layers in the network, the number of neurons and different activation functions,\n", - " - different optimizers and their parameters,\n", - " - batch size and the number of epochs,\n", - " - embedding layers,\n", - " - content-based features of both users and items." - ] - }, - { - "cell_type": "code", - "execution_count": 427, - "id": "unlike-recipient", - "metadata": {}, - "outputs": [], - "source": [ - "from recommenders.recommender import Recommender\n", - "\n", - "\n", - "# HR10 = 0.07\n", - "# class Net(nn.Module):\n", - "# def __init__(self, features_len, output_len):\n", - "# super(Net, self).__init__()\n", - " \n", - "# self.fc1 = nn.Linear(features_len, 150)\n", - "# self.fc2 = nn.Linear(150, 100)\n", - "# self.fc3 = nn.Linear(100, output_len)\n", - "# self.fc4 = nn.Linear(output_len, output_len+200)\n", - " \n", - "# self.dropout = nn.Dropout(p=0.5)\n", - " \n", - "# def forward(self, x):\n", - "# x = F.relu(self.fc1(x))\n", - "# x = self.dropout(x)\n", - "# x = F.relu(self.fc2(x))\n", - "# x = self.dropout(x)\n", - "# x = F.relu(self.fc3(x))\n", - "# return self.fc4(x)\n", - "\n", - "# HR10 = 0.06\n", - "# class Net(nn.Module):\n", - "# def __init__(self, features_len, output_len):\n", - "# super(Net, self).__init__()\n", - " \n", - "# self.fc1 = nn.Linear(features_len, 150)\n", - "# self.fc2 = nn.Linear(150, 100)\n", - "# self.fc3 = nn.Linear(100, output_len)\n", - "# self.fc4 = nn.Linear(output_len, output_len+150)\n", - "\n", - "# self.dropout = nn.Dropout(p=0.5)\n", - " \n", - "# def forward(self, x):\n", - "# x = F.relu(self.fc1(x))\n", - "# x = self.dropout(x)\n", - "# x = F.relu(self.fc2(x))\n", - "# x = self.dropout(x)\n", - "# x = F.relu(self.fc3(x))\n", - "# x = self.dropout(x)\n", - "# return self.fc4(x)\n", - "\n", - "# Softmax very bad choice for multiclassification\n", - "# class Net(nn.Module):\n", - "# def __init__(self, features_len, output_len):\n", - "# super(Net, self).__init__()\n", - " \n", - "# self.fc1 = nn.Linear(features_len, 150)\n", - "# self.fc2 = nn.Linear(150, 100)\n", - "# self.fc3 = nn.Linear(100, output_len)\n", - "# self.fc4 = nn.Linear(output_len, output_len+200)\n", - " \n", - "# self.dropout = nn.Dropout(p=0.5)\n", - "# self.softmax = nn.Softmax()\n", - " \n", - "# def forward(self, x):\n", - "# x = F.relu(self.fc1(x))\n", - "# x = self.dropout(x)\n", - "# x = F.relu(self.fc2(x))\n", - "# x = self.dropout(x)\n", - "# x = F.relu(self.fc3(x))\n", - "# x = self.fc4(x)\n", - "# x = self.softmax(x)\n", - "# return x\n", - " \n", - "# HR10 = 0.083\n", - "class Net(nn.Module):\n", - " def __init__(self, features_len, output_len):\n", - " super(Net, self).__init__()\n", - " \n", - " self.fc1 = nn.Linear(features_len, 150)\n", - " self.fc2 = nn.Linear(150, 100)\n", - " self.fc3 = nn.Linear(100, output_len)\n", - " self.fc4 = nn.Linear(output_len, output_len+200)\n", - " \n", - " self.dropout = nn.Dropout(p=0.5)\n", - " \n", - " def forward(self, x):\n", - " x = F.relu(self.fc1(x))\n", - " x = self.dropout(x)\n", - " x = F.relu(self.fc2(x))\n", - " x = self.dropout(x)\n", - " x = F.relu(self.fc3(x))\n", - " return self.fc4(x)\n", - " \n", - "class NNRecommender(Recommender):\n", - " \"\"\"\n", - " Linear recommender class based on user and item features.\n", - " \"\"\"\n", - " \n", - " def __init__(self, seed=6789, n_neg_per_pos=5, n_epochs=5000, lr=0.01,):\n", - " \"\"\"\n", - " Initialize base recommender params and variables.\n", - " \"\"\"\n", - " self.model = None\n", - " self.n_neg_per_pos = n_neg_per_pos\n", - " \n", - " self.recommender_df = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n", - " self.users_df = None\n", - " self.user_features = None\n", - " \n", - " self.seed = seed\n", - " self.rng = np.random.RandomState(seed=seed)\n", - " \n", - " self.n_epochs = n_epochs\n", - " self.lr = lr\n", - " \n", - " def calculate_accuracy(self, y_true, y_pred):\n", - " predictions=(y_pred.argmax(1))\n", - " return (predictions == y_true).sum().float() / len(y_true)\n", - " \n", - " def round_tensor(self, t, decimal_places=3):\n", - " return round(t.item(), decimal_places)\n", - " \n", - " def fit(self, interactions_df, users_df, items_df):\n", - " \"\"\"\n", - " Training of the recommender.\n", - " \n", - " :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items \n", - " defined by user_id, item_id and features of the interaction.\n", - " :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.\n", - " \"\"\"\n", - " \n", - " interactions_df = interactions_df.copy()\n", - " # Prepare users_df and items_df \n", - " # (optional - use only if you want to train a hybrid model with content-based features)\n", - " \n", - " users_df, user_features = prepare_users_df(interactions_df)\n", - " \n", - " self.users_df = users_df\n", - " self.user_features = user_features\n", - " \n", - " items_df, item_features = prepare_items_df(interactions_df)\n", - " items_df = items_df.loc[:, ['item_id'] + item_features]\n", - " \n", - " X = items_df[['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']]\n", - " y = items_df[['item_id']]\n", - " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=self.seed)\n", - " \n", - " X_train = torch.from_numpy(X_train.to_numpy()).float()\n", - " y_train = torch.squeeze(torch.from_numpy(y_train.to_numpy()).long())\n", - " X_test = torch.from_numpy(X_test.to_numpy()).float()\n", - " y_test = torch.squeeze(torch.from_numpy(y_test.to_numpy()).long())\n", - " \n", - " self.net = Net(X_train.shape[1], items_df['item_id'].unique().size)\n", - " \n", - " optimizer = optim.Adam(self.net.parameters(), lr=self.lr)\n", - " criterion = nn.CrossEntropyLoss()\n", - " \n", - " for epoch in range(self.n_epochs):\n", - " y_pred = self.net(X_train)\n", - " y_pred = torch.squeeze(y_pred)\n", - " train_loss = criterion(y_pred, y_train)\n", - " \n", - "# if epoch % 100 == 0:\n", - "# train_acc = self.calculate_accuracy(y_train, y_pred)\n", - "# y_test_pred = self.net(X_test)\n", - "# y_test_pred = torch.squeeze(y_test_pred)\n", - "# test_loss = criterion(y_test_pred, y_test)\n", - "# test_acc = self.calculate_accuracy(y_test, y_test_pred)\n", - "# print(\n", - "# f'''epoch {epoch}\n", - "# Train set - loss: {self.round_tensor(train_loss)}, accuracy: {self.round_tensor(train_acc)}\n", - "# Test set - loss: {self.round_tensor(test_loss)}, accuracy: {self.round_tensor(test_acc)}\n", - "# ''')\n", - " \n", - " optimizer.zero_grad()\n", - " train_loss.backward()\n", - " optimizer.step()\n", - " \n", - " def recommend(self, users_df, items_df, n_recommendations=1):\n", - " \"\"\"\n", - " Serving of recommendations. Scores items in items_df for each user in users_df and returns \n", - " top n_recommendations for each user.\n", - " \n", - " :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.\n", - " :param int n_recommendations: Number of recommendations to be returned for each user.\n", - " :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations \n", - " for each user.\n", - " :rtype: pd.DataFrame\n", - " \"\"\"\n", - " \n", - " # Clean previous recommendations (iloc could be used alternatively)\n", - " self.recommender_df = self.recommender_df[:0]\n", - " \n", - " # Prepare users_df and items_df\n", - " # (optional - use only if you want to train a hybrid model with content-based features)\n", - " \n", - " users_df = users_df.loc[:, 'user_id']\n", - " users_df = pd.merge(users_df, self.users_df, on=['user_id'], how='left').fillna(0)\n", - " \n", - " # items_df, item_features = prepare_items_df(items_df)\n", - " # items_df = items_df.loc[:, ['item_id'] + item_features]\n", - " \n", - " # Score the items\n", - " \n", - " recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n", - " \n", - " for ix, user in users_df.iterrows():\n", - " prep_user = torch.from_numpy(user[['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']].to_numpy()).float()\n", - " \n", - " scores = self.net(prep_user).detach().numpy()\n", - " \n", - " chosen_ids = np.argsort(-scores)[:n_recommendations]\n", - " \n", - " recommendations = []\n", - " for item_id in chosen_ids:\n", - " recommendations.append(\n", - " {\n", - " 'user_id': user['user_id'],\n", - " 'item_id': item_id,\n", - " 'score': scores[item_id]\n", - " }\n", - " )\n", - " \n", - " user_recommendations = pd.DataFrame(recommendations)\n", - " \n", - " self.recommender_df = pd.concat([self.recommender_df, user_recommendations])\n", - " \n", - " return self.recommender_df\n", - "\n", - "# Fit method\n", - "# nn_recommender = NNRecommender(10000, 0.02)\n", - "# nn_recommender.fit(interactions_df.head(1000), None, None)\n", - "# nn_recommender.fit(interactions_df, None, None)" - ] - }, - { - "cell_type": "markdown", - "id": "copyrighted-relative", - "metadata": {}, - "source": [ - "# Quick test of the recommender" - ] - }, - { - "cell_type": "code", - "execution_count": 412, - "id": "greatest-canon", - "metadata": {}, - "outputs": [], - "source": [ - "items_df = interactions_df.loc[:, ['item_id'] + base_item_features].drop_duplicates()" - ] - }, - { - "cell_type": "code", - "execution_count": 413, - "id": "initial-capital", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "epoch 0\n", - " Train set - loss: 6.042, accuracy: 0.011\n", - " Test set - loss: 6.025, accuracy: 0.0\n", - " \n", - "epoch 100\n", - " Train set - loss: 1.162, accuracy: 0.506\n", - " Test set - loss: 36.526, accuracy: 0.0\n", - " \n" - ] - } - ], - "source": [ - "# Fit method\n", - "nn_recommender = NNRecommender(n_epochs=200, lr=0.01)\n", - "nn_recommender.fit(interactions_df.head(1000), None, None)\n", - "# nn_recommender.fit(interactions_df, None, None)" - ] - }, - { - "cell_type": "code", - "execution_count": 414, - "id": "digital-consolidation", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iditem_idscoretermlength_of_stay_bucketrate_planroom_segmentn_people_bucketweekend_stay
01.01195.364058Easter[2-3]Standard[160-260][2-2]True
11.0885.033441WinterVacation[0-1]Standard[160-260][2-2]True
21.0574.771185WinterVacation[2-3]Standard[160-260][2-2]True
33.0211.286193WinterVacation[2-3]Standard[160-260][2-2]False
43.07410.848604WinterVacation[4-7]Standard[160-260][2-2]False
53.08110.656947WinterVacation[0-1]Standard[160-260][2-2]False
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Recommender method\n", - "\n", - "recommendations = nn_recommender.recommend(pd.DataFrame([[1],[3]], columns=['user_id']), items_df, 3)\n", - "\n", - "recommendations = pd.merge(recommendations, items_df, on='item_id', how='left')\n", - "display(HTML(recommendations.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "advanced-eleven", - "metadata": {}, - "source": [ - "# Tuning method" - ] - }, - { - "cell_type": "code", - "execution_count": 310, - "id": "strange-alaska", - "metadata": {}, - "outputs": [], - "source": [ - "from evaluation_and_testing.testing import evaluate_train_test_split_implicit\n", - "\n", - "seed = 6789" - ] - }, - { - "cell_type": "code", - "execution_count": 311, - "id": "stable-theta", - "metadata": {}, - "outputs": [], - "source": [ - "from hyperopt import hp, fmin, tpe, Trials\n", - "import traceback\n", - "\n", - "def tune_recommender(recommender_class, interactions_df, items_df, \n", - " param_space, max_evals=1, show_progressbar=True, seed=6789):\n", - " # Split into train_validation and test sets\n", - "\n", - " shuffle = np.arange(len(interactions_df))\n", - " rng = np.random.RandomState(seed=seed)\n", - " rng.shuffle(shuffle)\n", - " shuffle = list(shuffle)\n", - "\n", - " train_test_split = 0.8\n", - " split_index = int(len(interactions_df) * train_test_split)\n", - "\n", - " train_validation = interactions_df.iloc[shuffle[:split_index]]\n", - " test = interactions_df.iloc[shuffle[split_index:]]\n", - "\n", - " # Tune\n", - "\n", - " def loss(tuned_params):\n", - " recommender = recommender_class(seed=seed, **tuned_params)\n", - " hr1, hr3, hr5, hr10, ndcg1, ndcg3, ndcg5, ndcg10 = evaluate_train_test_split_implicit(\n", - " recommender, train_validation, items_df, seed=seed)\n", - " return -hr10\n", - "\n", - " n_tries = 1\n", - " succeded = False\n", - " try_id = 0\n", - " while not succeded and try_id < n_tries:\n", - " try:\n", - " trials = Trials()\n", - " best_param_set = fmin(loss, space=param_space, algo=tpe.suggest, \n", - " max_evals=max_evals, show_progressbar=show_progressbar, trials=trials, verbose=True)\n", - " succeded = True\n", - " except:\n", - " traceback.print_exc()\n", - " try_id += 1\n", - " \n", - " if not succeded:\n", - " return None\n", - " \n", - " # Validate\n", - " \n", - " recommender = recommender_class(seed=seed, **best_param_set)\n", - "\n", - " results = [[recommender_class.__name__] + list(evaluate_train_test_split_implicit(\n", - " recommender, {'train': train_validation, 'test': test}, items_df, seed=seed))]\n", - "\n", - " results = pd.DataFrame(results, \n", - " columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - " display(HTML(results.to_html()))\n", - " \n", - " return best_param_set" - ] - }, - { - "cell_type": "markdown", - "id": "reliable-switzerland", - "metadata": {}, - "source": [ - "## Tuning of the recommender\n", - "\n", - "**Task:**
\n", - "Tune your model using the code below. You only need to put the class name of your recommender and choose an appropriate parameter space." - ] - }, - { - "cell_type": "code", - "execution_count": 428, - "id": "obvious-astrology", - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0%| | 0/10 [00:01\", line 33, in tune_recommender\n", - " best_param_set = fmin(loss, space=param_space, algo=tpe.suggest,\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 507, in fmin\n", - " return trials.fmin(\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/base.py\", line 682, in fmin\n", - " return fmin(\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 553, in fmin\n", - " rval.exhaust()\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 356, in exhaust\n", - " self.run(self.max_evals - n_done, block_until_done=self.asynchronous)\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 292, in run\n", - " self.serial_evaluate()\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 170, in serial_evaluate\n", - " result = self.domain.evaluate(spec, ctrl)\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/base.py\", line 907, in evaluate\n", - " rval = self.fn(pyll_rval)\n", - " File \"\", line 23, in loss\n", - " hr1, hr3, hr5, hr10, ndcg1, ndcg3, ndcg5, ndcg10 = evaluate_train_test_split_implicit(\n", - " File \"/home/jovyan/REK/evaluation_and_testing/testing.py\", line 93, in evaluate_train_test_split_implicit\n", - " recommender.fit(interactions_df_train, None, items_df)\n", - " File \"\", line 131, in fit\n", - " users_df, user_features = prepare_users_df(interactions_df)\n", - " File \"\", line 15, in prepare_users_df\n", - " users_df = users_df.groupby(\"user_id\").first().reset_index(drop=False)\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/groupby/groupby.py\", line 1698, in first\n", - " return self._agg_general(\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/groupby/groupby.py\", line 1044, in _agg_general\n", - " result = self.aggregate(lambda x: npfunc(x, axis=self.axis))\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/groupby/generic.py\", line 977, in aggregate\n", - " result = self._aggregate_frame(func)\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/groupby/generic.py\", line 1135, in _aggregate_frame\n", - " fres = func(data, *args, **kwargs)\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/groupby/groupby.py\", line 1044, in \n", - " result = self.aggregate(lambda x: npfunc(x, axis=self.axis))\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/groupby/groupby.py\", line 1692, in first_compat\n", - " return obj.apply(first, axis=axis)\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/frame.py\", line 7768, in apply\n", - " return op.get_result()\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/apply.py\", line 185, in get_result\n", - " return self.apply_standard()\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/apply.py\", line 276, in apply_standard\n", - " results, res_index = self.apply_series_generator()\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/apply.py\", line 288, in apply_series_generator\n", - " for i, v in enumerate(series_gen):\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/apply.py\", line 330, in \n", - " return (self.obj._ixs(i, axis=1) for i in range(len(self.columns)))\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/frame.py\", line 2964, in _ixs\n", - " values = self._mgr.iget(i)\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/internals/managers.py\", line 1006, in iget\n", - " return SingleBlockManager(\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/pandas/core/internals/managers.py\", line 1555, in __init__\n", - " if fastpath is not lib.no_default:\n", - "KeyboardInterrupt\n" - ] - } - ], - "source": [ - "param_space = {\n", - " 'n_neg_per_pos': hp.quniform('n_neg_per_pos', 1, 10, 1)\n", - "}\n", - "items_df['item_id'].unique().size\n", - "\n", - "best_param_set = tune_recommender(NNRecommender, interactions_df, items_df,\n", - " param_space, max_evals=10, show_progressbar=True, seed=seed)\n", - "\n", - "print(\"Best parameters:\")\n", - "print(best_param_set)" - ] - }, - { - "cell_type": "markdown", - "id": "accredited-strap", - "metadata": {}, - "source": [ - "# Final evaluation\n", - "\n", - "**Task:**
\n", - "Run the final evaluation of your recommender and present its results against the Amazon and Netflix recommenders' results. You just need to give the class name of your recommender and its tuned parameters below." - ] - }, - { - "cell_type": "code", - "execution_count": 434, - "id": "given-homework", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0NNRecommender0.0250080.0352090.0664690.1168150.0250080.03110.0436970.059459
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "nn_recommender = NNRecommender(n_neg_per_pos=6, n_epochs=20000) # Initialize your recommender here\n", - "\n", - "# Give the name of your recommender in the line below\n", - "nn_tts_results = [['NNRecommender'] + list(evaluate_train_test_split_implicit(\n", - " nn_recommender, interactions_df, items_df))]\n", - "\n", - "nn_tts_results = pd.DataFrame(\n", - " nn_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(nn_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 314, - "id": "suited-nomination", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0AmazonRecommender0.0421190.104640.1405070.1994080.0421190.0768260.0917970.110711
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from recommenders.amazon_recommender import AmazonRecommender\n", - "\n", - "amazon_recommender = AmazonRecommender()\n", - "\n", - "amazon_tts_results = [['AmazonRecommender'] + list(evaluate_train_test_split_implicit(\n", - " amazon_recommender, interactions_df, items_df))]\n", - "\n", - "amazon_tts_results = pd.DataFrame(\n", - " amazon_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(amazon_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 315, - "id": "conservative-remedy", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loss\n", - "\ttraining \t (min: 0.161, max: 0.228, cur: 0.161)\n", - "\tvalidation \t (min: 0.176, max: 0.242, cur: 0.177)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0NetflixRecommender0.0427770.1066140.1431390.2003950.0427770.0782280.0934830.111724
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from recommenders.netflix_recommender import NetflixRecommender\n", - "\n", - "netflix_recommender = NetflixRecommender(n_epochs=30, print_type='live')\n", - "\n", - "netflix_tts_results = [['NetflixRecommender'] + list(evaluate_train_test_split_implicit(\n", - " netflix_recommender, interactions_df, items_df))]\n", - "\n", - "netflix_tts_results = pd.DataFrame(\n", - " netflix_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(netflix_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 435, - "id": "moderate-printing", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0NNRecommender0.0250080.0352090.0664690.1168150.0250080.0311000.0436970.059459
1AmazonRecommender0.0421190.1046400.1405070.1994080.0421190.0768260.0917970.110711
2NetflixRecommender0.0427770.1066140.1431390.2003950.0427770.0782280.0934830.111724
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "tts_results = pd.concat([nn_tts_results, amazon_tts_results, netflix_tts_results]).reset_index(drop=True)\n", - "display(HTML(tts_results.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "uniform-vegetable", - "metadata": {}, - "source": [ - "# Summary\n", - "\n", - "**Task:**
\n", - "Write a summary of your experiments. What worked well and what did not? What are your thoughts how could you possibly further improve the model?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8f5451ec", - "metadata": {}, - "outputs": [], - "source": [ - "Na początku bezmyślnie użyłem BCELoss, \n", - "to był duży błąd, który kosztował mnie godzinę szukania w internecie, dlaczego ciągle zwraca mi tylko item-id=0\n", - "\n", - "Wyższe \"accuracy\" w testach != lepszy wynik w predykcjach \n", - "\n", - "Fitting nie zawsze znajduje najlepszy możliwy parametr. Miałem przypadek gdzie został wybrany 5, a dawał HR 0.05, podczas gdy 6 dawał 0.08\n", - "\n", - "Dodanie dropout potrawfi znacząco zwiększyć wyniki. Dropout podniósł HR10 z 0.035 do 0.11 \n", - "(niestety, w trakcie dalszych prób udoskonalenia, gdzieś zagubiłem to rozwiązanie)\n", - "\n", - "Podsumowanie:\n", - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "rek_uno", - "language": "python", - "name": "rek_uno" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/project_2_recommender_and_evaluation-Copy1.ipynb b/project_2_recommender_and_evaluation-Copy1.ipynb deleted file mode 100644 index 529fb99..0000000 --- a/project_2_recommender_and_evaluation-Copy1.ipynb +++ /dev/null @@ -1,1687 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 17, - "id": "alike-morgan", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], - "source": [ - "%matplotlib inline\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "from IPython.display import Markdown, display, HTML\n", - "from collections import defaultdict\n", - "\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", - "from livelossplot import PlotLosses\n", - "\n", - "# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n", - "import os\n", - "os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'" - ] - }, - { - "cell_type": "markdown", - "id": "blessed-knitting", - "metadata": {}, - "source": [ - "# Load the dataset for recommenders" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "victorian-bottom", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iditem_idtermlength_of_stay_bucketrate_planroom_segmentn_people_bucketweekend_stay
010WinterVacation[2-3]Standard[260-360][5-inf]True
121WinterVacation[2-3]Standard[160-260][3-4]True
232WinterVacation[2-3]Standard[160-260][2-2]False
343WinterVacation[4-7]Standard[160-260][3-4]True
454WinterVacation[4-7]Standard[0-160][2-2]True
565Easter[4-7]Standard[260-360][5-inf]True
676OffSeason[2-3]Standard[260-360][5-inf]True
787HighSeason[2-3]Standard[160-260][1-1]True
898HighSeason[2-3]Standard[0-160][1-1]True
987HighSeason[2-3]Standard[160-260][1-1]True
1087HighSeason[2-3]Standard[160-260][1-1]True
11109HighSeason[2-3]Standard[160-260][3-4]True
12119HighSeason[2-3]Standard[160-260][3-4]True
131210HighSeason[8-inf]Standard[160-260][3-4]True
141411HighSeason[2-3]Standard[0-160][3-4]True
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "data_path = os.path.join(\"data\", \"hotel_data\")\n", - "\n", - "interactions_df = pd.read_csv(os.path.join(data_path, \"hotel_data_interactions_df.csv\"), index_col=0)\n", - "\n", - "base_item_features = ['term', 'length_of_stay_bucket', 'rate_plan', 'room_segment', 'n_people_bucket', 'weekend_stay']\n", - "\n", - "column_values_dict = {\n", - " 'term': ['WinterVacation', 'Easter', 'OffSeason', 'HighSeason', 'LowSeason', 'MayLongWeekend', 'NewYear', 'Christmas'],\n", - " 'length_of_stay_bucket': ['[0-1]', '[2-3]', '[4-7]', '[8-inf]'],\n", - " 'rate_plan': ['Standard', 'Nonref'],\n", - " 'room_segment': ['[0-160]', '[160-260]', '[260-360]', '[360-500]', '[500-900]'],\n", - " 'n_people_bucket': ['[1-1]', '[2-2]', '[3-4]', '[5-inf]'],\n", - " 'weekend_stay': ['True', 'False']\n", - "}\n", - "\n", - "interactions_df.loc[:, 'term'] = pd.Categorical(\n", - " interactions_df['term'], categories=column_values_dict['term'])\n", - "interactions_df.loc[:, 'length_of_stay_bucket'] = pd.Categorical(\n", - " interactions_df['length_of_stay_bucket'], categories=column_values_dict['length_of_stay_bucket'])\n", - "interactions_df.loc[:, 'rate_plan'] = pd.Categorical(\n", - " interactions_df['rate_plan'], categories=column_values_dict['rate_plan'])\n", - "interactions_df.loc[:, 'room_segment'] = pd.Categorical(\n", - " interactions_df['room_segment'], categories=column_values_dict['room_segment'])\n", - "interactions_df.loc[:, 'n_people_bucket'] = pd.Categorical(\n", - " interactions_df['n_people_bucket'], categories=column_values_dict['n_people_bucket'])\n", - "interactions_df.loc[:, 'weekend_stay'] = interactions_df['weekend_stay'].astype('str')\n", - "interactions_df.loc[:, 'weekend_stay'] = pd.Categorical(\n", - " interactions_df['weekend_stay'], categories=column_values_dict['weekend_stay'])\n", - "\n", - "display(HTML(interactions_df.head(15).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "realistic-third", - "metadata": {}, - "source": [ - "# (Optional) Prepare numerical user features\n", - "\n", - "The method below is left here for convenience if you want to experiment with content-based user features as an input for your neural network." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "variable-jaguar", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_length_of_stay_bucket_[0-1]', 'user_length_of_stay_bucket_[2-3]', 'user_length_of_stay_bucket_[4-7]', 'user_length_of_stay_bucket_[8-inf]', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iduser_term_WinterVacationuser_term_Easteruser_term_OffSeasonuser_term_HighSeasonuser_term_LowSeasonuser_term_MayLongWeekenduser_term_NewYearuser_term_Christmasuser_length_of_stay_bucket_[0-1]user_length_of_stay_bucket_[2-3]user_length_of_stay_bucket_[4-7]user_length_of_stay_bucket_[8-inf]user_rate_plan_Standarduser_rate_plan_Nonrefuser_room_segment_[0-160]user_room_segment_[160-260]user_room_segment_[260-360]user_room_segment_[360-500]user_room_segment_[500-900]user_n_people_bucket_[1-1]user_n_people_bucket_[2-2]user_n_people_bucket_[3-4]user_n_people_bucket_[5-inf]user_weekend_stay_Trueuser_weekend_stay_False
010.1304350.00.6521740.0869570.1304350.0000000.0000000.0000000.0000000.6086960.3913040.0000000.5217390.4782610.0000000.8695650.1304350.0000000.00.0000000.7391300.1739130.0869570.7826090.217391
47500.0434780.00.4347830.3043480.2173910.0000000.0000000.0000000.0000000.9130430.0869570.0000000.2608700.7391300.0000000.5652170.4347830.0000000.00.0000000.1739130.5217390.3043480.7826090.217391
92960.0833330.00.7083330.1250000.0416670.0416670.0000000.0000000.2500000.6666670.0416670.0416670.2916670.7083330.1250000.7916670.0833330.0000000.00.0416670.3333330.5416670.0833330.7500000.250000
1111150.7272730.00.2727270.0000000.0000000.0000000.0000000.0000000.5000000.3636360.1363640.0000001.0000000.0000000.0000000.8181820.1818180.0000000.00.8181820.0909090.0454550.0454550.3636360.636364
6757060.0919880.00.4510390.1899110.2077150.0385760.0118690.0089020.1691390.4599410.2729970.0979230.9940650.0059350.0207720.8397630.1305640.0089020.00.0415430.0949550.7388720.1246290.6765580.323442
169917360.0344830.00.4827590.2068970.2758620.0000000.0000000.0000000.2413790.5517240.2068970.0000000.1724140.8275860.0000000.9310340.0689660.0000000.00.3793100.4137930.2068970.0000000.4482760.551724
763977790.0370370.00.2962960.2592590.3703700.0000000.0000000.0370370.1111110.2962960.4814810.1111111.0000000.0000000.0000000.8148150.1851850.0000000.00.0000000.0370370.7407410.2222220.8148150.185185
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def n_to_p(l):\n", - " n = sum(l)\n", - " return [x / n for x in l] if n > 0 else l\n", - "\n", - "def calculate_p(x, values):\n", - " counts = [0]*len(values)\n", - " for v in x:\n", - " counts[values.index(v)] += 1\n", - "\n", - " return n_to_p(counts)\n", - "\n", - "def prepare_users_df(interactions_df):\n", - "\n", - " users_df = interactions_df.loc[:, [\"user_id\"]]\n", - " users_df = users_df.groupby(\"user_id\").first().reset_index(drop=False)\n", - " \n", - " user_features = []\n", - "\n", - " for column in base_item_features:\n", - "\n", - " column_values = column_values_dict[column]\n", - " df = interactions_df.loc[:, ['user_id', column]]\n", - " df = df.groupby('user_id').aggregate(lambda x: list(x)).reset_index(drop=False)\n", - "\n", - " def calc_p(x):\n", - " return calculate_p(x, column_values)\n", - "\n", - " df.loc[:, column] = df[column].apply(lambda x: calc_p(x))\n", - "\n", - " p_columns = []\n", - " for i in range(len(column_values)):\n", - " p_columns.append(\"user_\" + column + \"_\" + column_values[i])\n", - " df.loc[:, p_columns[i]] = df[column].apply(lambda x: x[i])\n", - " user_features.append(p_columns[i])\n", - "\n", - " users_df = pd.merge(users_df, df.loc[:, ['user_id'] + p_columns], on=[\"user_id\"])\n", - " \n", - " return users_df, user_features\n", - " \n", - "\n", - "users_df, user_features = prepare_users_df(interactions_df)\n", - "\n", - "print(user_features)\n", - "\n", - "display(HTML(users_df.loc[users_df['user_id'].isin([706, 1736, 7779, 96, 1, 50, 115])].head(15).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "amino-keyboard", - "metadata": {}, - "source": [ - "# (Optional) Prepare numerical item features\n", - "\n", - "The method below is left here for convenience if you want to experiment with content-based item features as an input for your neural network." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "formal-munich", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'length_of_stay_bucket_[0-1]', 'length_of_stay_bucket_[2-3]', 'length_of_stay_bucket_[4-7]', 'length_of_stay_bucket_[8-inf]', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
item_idterm_WinterVacationterm_Easterterm_OffSeasonterm_HighSeasonterm_LowSeasonterm_MayLongWeekendterm_NewYearterm_Christmaslength_of_stay_bucket_[0-1]length_of_stay_bucket_[2-3]length_of_stay_bucket_[4-7]length_of_stay_bucket_[8-inf]rate_plan_Standardrate_plan_Nonrefroom_segment_[0-160]room_segment_[160-260]room_segment_[260-360]room_segment_[360-500]room_segment_[500-900]n_people_bucket_[1-1]n_people_bucket_[2-2]n_people_bucket_[3-4]n_people_bucket_[5-inf]weekend_stay_Trueweekend_stay_False
001000000001001000100000110
111000000001001001000001010
221000000001001001000010001
331000000000101001000001010
441000000000101010000010010
550100000000101000100000110
660010000001001000100000110
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def map_items_to_onehot(df):\n", - " one_hot = pd.get_dummies(df.loc[:, base_item_features])\n", - " df = df.drop(base_item_features, axis = 1)\n", - " df = df.join(one_hot)\n", - " \n", - " return df, list(one_hot.columns)\n", - "\n", - "def prepare_items_df(interactions_df):\n", - " items_df = interactions_df.loc[:, [\"item_id\"] + base_item_features].drop_duplicates()\n", - " \n", - " items_df, item_features = map_items_to_onehot(items_df)\n", - " \n", - " return items_df, item_features\n", - "\n", - "\n", - "items_df, item_features = prepare_items_df(interactions_df)\n", - "\n", - "print(item_features)\n", - "\n", - "display(HTML(items_df.loc[items_df['item_id'].isin([0, 1, 2, 3, 4, 5, 6])].head(15).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "figured-imaging", - "metadata": {}, - "source": [ - "# Neural network recommender\n", - "\n", - "**Task:**
\n", - "Code a recommender based on a neural network model. You are free to choose any network architecture you find appropriate. The network can use the interaction vectors for users and items, embeddings of users and items, as well as user and item features (you can use the features you developed in the first project).\n", - "\n", - "Remember to keep control over randomness - in the init method add the seed as a parameter and initialize the random seed generator with that seed (both for numpy and pytorch):\n", - "\n", - "```python\n", - "self.seed = seed\n", - "self.rng = np.random.RandomState(seed=seed)\n", - "```\n", - "in the network model:\n", - "```python\n", - "self.seed = torch.manual_seed(seed)\n", - "```\n", - "\n", - "You are encouraged to experiment with:\n", - " - the number of layers in the network, the number of neurons and different activation functions,\n", - " - different optimizers and their parameters,\n", - " - batch size and the number of epochs,\n", - " - embedding layers,\n", - " - content-based features of both users and items." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "unlike-recipient", - "metadata": {}, - "outputs": [], - "source": [ - "from recommenders.recommender import Recommender\n", - "\n", - "\n", - "class Net(nn.Module):\n", - " def __init__(self, features_len, output_len):\n", - " super(Net, self).__init__()\n", - " \n", - " print(\"IN:\", features_len, \"OUT:\", output_len)\n", - " \n", - " self.fc1 = nn.Linear(features_len, 150)\n", - " self.fc2 = nn.Linear(150, 50)\n", - " self.fc3 = nn.Linear(50, 25)\n", - " self.fc4 = nn.Linear(25, output_len+500)\n", - " \n", - " def forward(self, x):\n", - " x = F.relu(self.fc1(x))\n", - " x = F.relu(self.fc2(x))\n", - " x = F.relu(self.fc3(x))\n", - " return self.fc4(x)\n", - "\n", - "# class Net(nn.Module):\n", - "# def __init__(self, features_len):\n", - "# super(Net, self).__init__()\n", - "# self.hid1 = nn.Linear(features_len, 10)\n", - "# self.hid2 = nn.Linear(10, 10)\n", - "# self.oupt = nn.Linear(10, 1)\n", - "\n", - "# nn.init.xavier_uniform_(self.hid1.weight)\n", - "# nn.init.zeros_(self.hid1.bias)\n", - "# nn.init.xavier_uniform_(self.hid2.weight)\n", - "# nn.init.zeros_(self.hid2.bias)\n", - "# nn.init.xavier_uniform_(self.oupt.weight)\n", - "# nn.init.zeros_(self.oupt.bias)\n", - "\n", - "# def forward(self, x):\n", - "# z = torch.tanh(self.hid1(x))\n", - "# z = torch.tanh(self.hid2(z))\n", - "# z = torch.sigmoid(self.oupt(z))\n", - "# return z\n", - " \n", - " \n", - "class NNRecommender(Recommender):\n", - " \"\"\"\n", - " Linear recommender class based on user and item features.\n", - " \"\"\"\n", - " \n", - " def generate_negative_interaction(self):\n", - " user_ids = interactions_df['user_id']\n", - " item_ids = interactions_df['item_id']\n", - " \n", - " user_id = user_ids.sample().item()\n", - " item_id = item_ids.sample().item()\n", - " positive_interactions = interactions_df.loc[\n", - " (interactions_df['item_id'] == item_id) & (interactions_df['user_id'] == user_id)]\n", - " \n", - " while not positive_interactions.empty:\n", - " user_id = user_ids.sample().item()\n", - " item_id = item_ids.sample().item()\n", - " positive_interactions = interactions_df.loc[\n", - " (interactions_df['item_id'] == item_id) & (interactions_df['user_id'] == user_id)]\n", - " \n", - " return (user_id, item_id, 0)\n", - " \n", - " def generate_negative_interactions(self, n, interactions_df, cross_df):\n", - " combined_dfs = pd.concat([cross_df, interactions_df[['user_id', 'item_id']]])\n", - " return combined_dfs.drop_duplicates(keep=False).sample(n=n)\n", - " \n", - " \n", - " def __init__(self, seed=6789, n_neg_per_pos=5):\n", - " \"\"\"\n", - " Initialize base recommender params and variables.\n", - " \"\"\"\n", - " self.model = None\n", - " self.n_neg_per_pos = n_neg_per_pos\n", - " \n", - " self.recommender_df = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n", - " self.users_df = None\n", - " self.user_features = None\n", - " \n", - " self.seed = seed\n", - " self.rng = np.random.RandomState(seed=seed)\n", - " \n", - " def calculate_accuracy(self, y_true, y_pred):\n", - " predicted = y_pred.ge(.5).view(-1)\n", - " return (y_true == predicted).sum().float() / len(y_true)\n", - " \n", - " def round_tensor(self, t, decimal_places=3):\n", - " return round(t.item(), decimal_places)\n", - " \n", - " def fit(self, interactions_df, users_df, items_df):\n", - " \"\"\"\n", - " Training of the recommender.\n", - " \n", - " :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items \n", - " defined by user_id, item_id and features of the interaction.\n", - " :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.\n", - " \"\"\"\n", - " \n", - " interactions_df = interactions_df.copy()\n", - " # Prepare users_df and items_df \n", - " # (optional - use only if you want to train a hybrid model with content-based features)\n", - " \n", - " users_df, user_features = prepare_users_df(interactions_df)\n", - " \n", - " self.users_df = users_df\n", - " self.user_features = user_features\n", - " \n", - " items_df, item_features = prepare_items_df(interactions_df)\n", - " items_df = items_df.loc[:, ['item_id'] + item_features]\n", - " \n", - " n_epochs = 51\n", - "\n", - " X = items_df[['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']]\n", - " y = items_df[['item_id']]\n", - " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=self.seed)\n", - " \n", - " X_train = torch.from_numpy(X_train.to_numpy()).float()\n", - " y_train = torch.squeeze(torch.from_numpy(y_train.to_numpy()).long())\n", - " X_test = torch.from_numpy(X_test.to_numpy()).float()\n", - " y_test = torch.squeeze(torch.from_numpy(y_test.to_numpy()).long())\n", - " \n", - " self.net = Net(X_train.shape[1], items_df['item_id'].unique().size)\n", - " \n", - " optimizer = optim.Adam(self.net.parameters(), lr=0.05)\n", - " criterion = nn.CrossEntropyLoss()\n", - " \n", - " for epoch in range(n_epochs):\n", - " y_pred = self.net(X_train)\n", - " y_pred = torch.squeeze(y_pred)\n", - " train_loss = criterion(y_pred, y_train)\n", - " \n", - "# if epoch % 5000 == 0:\n", - "# train_acc = self.calculate_accuracy(y_train, y_pred)\n", - "# y_test_pred = self.net(X_test)\n", - "# y_test_pred = torch.squeeze(y_test_pred)\n", - "# test_loss = criterion(y_test_pred, y_test)\n", - "# test_acc = self.calculate_accuracy(y_test, y_test_pred)\n", - "# print(\n", - "# f'''epoch {epoch}\n", - "# Train set - loss: {self.round_tensor(train_loss)}, accuracy: {self.round_tensor(train_acc)}\n", - "# Test set - loss: {self.round_tensor(test_loss)}, accuracy: {self.round_tensor(test_acc)}\n", - "# ''')\n", - " \n", - " optimizer.zero_grad()\n", - " train_loss.backward()\n", - " optimizer.step()\n", - " \n", - " def recommend(self, users_df, items_df, n_recommendations=1):\n", - " \"\"\"\n", - " Serving of recommendations. Scores items in items_df for each user in users_df and returns \n", - " top n_recommendations for each user.\n", - " \n", - " :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.\n", - " :param int n_recommendations: Number of recommendations to be returned for each user.\n", - " :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations \n", - " for each user.\n", - " :rtype: pd.DataFrame\n", - " \"\"\"\n", - " \n", - " # Clean previous recommendations (iloc could be used alternatively)\n", - " self.recommender_df = self.recommender_df[:0]\n", - " \n", - " # Prepare users_df and items_df\n", - " # (optional - use only if you want to train a hybrid model with content-based features)\n", - " \n", - " users_df = users_df.loc[:, 'user_id']\n", - " users_df = pd.merge(users_df, self.users_df, on=['user_id'], how='left').fillna(0)\n", - " \n", - " # items_df, item_features = prepare_items_df(items_df)\n", - " # items_df = items_df.loc[:, ['item_id'] + item_features]\n", - " \n", - " # Score the items\n", - " \n", - " recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n", - " \n", - " for ix, user in users_df.iterrows():\n", - " prep_user = torch.from_numpy(user[['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']].to_numpy()).float()\n", - " \n", - " scores = self.net(prep_user).detach().numpy()\n", - " \n", - " chosen_ids = np.argsort(-scores)[:n_recommendations]\n", - " \n", - " recommendations = []\n", - " for item_id in chosen_ids:\n", - " recommendations.append(\n", - " {\n", - " 'user_id': user['user_id'],\n", - " 'item_id': item_id,\n", - " 'score': scores[item_id]\n", - " }\n", - " )\n", - " \n", - " user_recommendations = pd.DataFrame(recommendations)\n", - " \n", - " self.recommender_df = pd.concat([self.recommender_df, user_recommendations])\n", - " \n", - " return self.recommender_df" - ] - }, - { - "cell_type": "markdown", - "id": "copyrighted-relative", - "metadata": {}, - "source": [ - "# Quick test of the recommender" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "greatest-canon", - "metadata": {}, - "outputs": [], - "source": [ - "items_df = interactions_df.loc[:, ['item_id'] + base_item_features].drop_duplicates()" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "initial-capital", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'train_test_split' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Fit method\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mnn_recommender\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mNNRecommender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mnn_recommender\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minteractions_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;31m# nn_recommender.fit(interactions_df, None, None)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, interactions_df, users_df, items_df)\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mitems_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'term_WinterVacation'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_Easter'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_OffSeason'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_HighSeason'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_LowSeason'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_MayLongWeekend'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_NewYear'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'term_Christmas'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rate_plan_Standard'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rate_plan_Nonref'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[0-160]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[160-260]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[260-360]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[360-500]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'room_segment_[500-900]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'n_people_bucket_[1-1]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'n_people_bucket_[2-2]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'n_people_bucket_[3-4]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'n_people_bucket_[5-inf]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'weekend_stay_True'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'weekend_stay_False'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mitems_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'item_id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 116\u001b[0;31m \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0mX_train\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'train_test_split' is not defined" - ] - } - ], - "source": [ - "# Fit method\n", - "nn_recommender = NNRecommender()\n", - "nn_recommender.fit(interactions_df.head(1000), None, None)\n", - "# nn_recommender.fit(interactions_df, None, None)" - ] - }, - { - "cell_type": "code", - "execution_count": 193, - "id": "digital-consolidation", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iditem_idscoretermlength_of_stay_bucketrate_planroom_segmentn_people_bucketweekend_stay
01.08837.715969WinterVacation[0-1]Standard[160-260][2-2]True
11.05736.182877WinterVacation[2-3]Standard[160-260][2-2]True
21.06935.771114WinterVacation[4-7]Standard[160-260][2-2]True
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Recommender method\n", - "\n", - "recommendations = nn_recommender.recommend(pd.DataFrame([[1]], columns=['user_id']), items_df, 3)\n", - "\n", - "recommendations = pd.merge(recommendations, items_df, on='item_id', how='left')\n", - "display(HTML(recommendations.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "advanced-eleven", - "metadata": {}, - "source": [ - "# Tuning method" - ] - }, - { - "cell_type": "code", - "execution_count": 194, - "id": "strange-alaska", - "metadata": {}, - "outputs": [], - "source": [ - "from evaluation_and_testing.testing import evaluate_train_test_split_implicit\n", - "\n", - "seed = 6789" - ] - }, - { - "cell_type": "code", - "execution_count": 195, - "id": "stable-theta", - "metadata": {}, - "outputs": [], - "source": [ - "from hyperopt import hp, fmin, tpe, Trials\n", - "import traceback\n", - "\n", - "def tune_recommender(recommender_class, interactions_df, items_df, \n", - " param_space, max_evals=1, show_progressbar=True, seed=6789):\n", - " # Split into train_validation and test sets\n", - "\n", - " shuffle = np.arange(len(interactions_df))\n", - " rng = np.random.RandomState(seed=seed)\n", - " rng.shuffle(shuffle)\n", - " shuffle = list(shuffle)\n", - "\n", - " train_test_split = 0.8\n", - " split_index = int(len(interactions_df) * train_test_split)\n", - "\n", - " train_validation = interactions_df.iloc[shuffle[:split_index]]\n", - " test = interactions_df.iloc[shuffle[split_index:]]\n", - "\n", - " # Tune\n", - "\n", - " def loss(tuned_params):\n", - " recommender = recommender_class(seed=seed, **tuned_params)\n", - " hr1, hr3, hr5, hr10, ndcg1, ndcg3, ndcg5, ndcg10 = evaluate_train_test_split_implicit(\n", - " recommender, train_validation, items_df, seed=seed)\n", - " return -hr10\n", - "\n", - " n_tries = 1\n", - " succeded = False\n", - " try_id = 0\n", - " while not succeded and try_id < n_tries:\n", - " try:\n", - " trials = Trials()\n", - " best_param_set = fmin(loss, space=param_space, algo=tpe.suggest, \n", - " max_evals=max_evals, show_progressbar=show_progressbar, trials=trials, verbose=True)\n", - " succeded = True\n", - " except:\n", - " traceback.print_exc()\n", - " try_id += 1\n", - " \n", - " if not succeded:\n", - " return None\n", - " \n", - " # Validate\n", - " \n", - " recommender = recommender_class(seed=seed, **best_param_set)\n", - "\n", - " results = [[recommender_class.__name__] + list(evaluate_train_test_split_implicit(\n", - " recommender, {'train': train_validation, 'test': test}, items_df, seed=seed))]\n", - "\n", - " results = pd.DataFrame(results, \n", - " columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - " display(HTML(results.to_html()))\n", - " \n", - " return best_param_set" - ] - }, - { - "cell_type": "markdown", - "id": "reliable-switzerland", - "metadata": {}, - "source": [ - "## Tuning of the recommender\n", - "\n", - "**Task:**
\n", - "Tune your model using the code below. You only need to put the class name of your recommender and choose an appropriate parameter space." - ] - }, - { - "cell_type": "code", - "execution_count": 196, - "id": "obvious-astrology", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "IN: \n", - "21 \n", - "OUT: \n", - "691 \n", - "IN: \n", - "21 \n", - "OUT: \n", - "691 \n", - "IN: \n", - "21 \n", - "OUT: \n", - "691 \n", - "IN: \n", - "21 \n", - "OUT: \n", - "691 \n", - "IN: \n", - "21 \n", - "OUT: \n", - "691 \n", - "IN: \n", - "21 \n", - "OUT: \n", - "691 \n", - "IN: \n", - "21 \n", - "OUT: \n", - "691 \n", - "IN: \n", - "21 \n", - "OUT: \n", - "691 \n", - "IN: \n", - "21 \n", - "OUT: \n", - "691 \n", - "IN: \n", - "21 \n", - "OUT: \n", - "691 \n", - "100%|██████████| 10/10 [18:34<00:00, 111.50s/trial, best loss: -0.04424416222859484]\n", - "IN: 21 OUT: 736\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0NNRecommender0.0102010.0200720.0263240.0355380.0102010.015740.0182160.021141
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Best parameters:\n", - "{'n_neg_per_pos': 9.0}\n" - ] - } - ], - "source": [ - "param_space = {\n", - " 'n_neg_per_pos': hp.quniform('n_neg_per_pos', 1, 10, 1)\n", - "}\n", - "items_df['item_id'].unique().size\n", - "\n", - "best_param_set = tune_recommender(NNRecommender, interactions_df, items_df,\n", - " param_space, max_evals=10, show_progressbar=True, seed=seed)\n", - "\n", - "print(\"Best parameters:\")\n", - "print(best_param_set)" - ] - }, - { - "cell_type": "markdown", - "id": "accredited-strap", - "metadata": {}, - "source": [ - "# Final evaluation\n", - "\n", - "**Task:**
\n", - "Run the final evaluation of your recommender and present its results against the Amazon and Netflix recommenders' results. You just need to give the class name of your recommender and its tuned parameters below." - ] - }, - { - "cell_type": "code", - "execution_count": 198, - "id": "given-homework", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "IN: 21 OUT: 736\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0NNRecommender0.0039490.0151370.0197430.0266540.0039490.0103610.012230.014409
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "nn_recommender = NNRecommender(n_neg_per_pos=9) # Initialize your recommender here\n", - "\n", - "# Give the name of your recommender in the line below\n", - "nn_tts_results = [['NNRecommender'] + list(evaluate_train_test_split_implicit(\n", - " nn_recommender, interactions_df, items_df))]\n", - "\n", - "nn_tts_results = pd.DataFrame(\n", - " nn_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(nn_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 199, - "id": "suited-nomination", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0AmazonRecommender0.0421190.104640.1405070.1994080.0421190.0768260.0917970.110705
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from recommenders.amazon_recommender import AmazonRecommender\n", - "\n", - "amazon_recommender = AmazonRecommender()\n", - "\n", - "amazon_tts_results = [['AmazonRecommender'] + list(evaluate_train_test_split_implicit(\n", - " amazon_recommender, interactions_df, items_df))]\n", - "\n", - "amazon_tts_results = pd.DataFrame(\n", - " amazon_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(amazon_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "conservative-remedy", - "metadata": {}, - "outputs": [], - "source": [ - "from recommenders.netflix_recommender import NetflixRecommender\n", - "\n", - "netflix_recommender = NetflixRecommender(n_epochs=30, print_type='live')\n", - "\n", - "netflix_tts_results = [['NetflixRecommender'] + list(evaluate_train_test_split_implicit(\n", - " netflix_recommender, interactions_df, items_df))]\n", - "\n", - "netflix_tts_results = pd.DataFrame(\n", - " netflix_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(netflix_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "moderate-printing", - "metadata": {}, - "outputs": [], - "source": [ - "tts_results = pd.concat([nn_tts_results, amazon_tts_results, netflix_tts_results]).reset_index(drop=True)\n", - "display(HTML(tts_results.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "uniform-vegetable", - "metadata": {}, - "source": [ - "# Summary\n", - "\n", - "**Task:**
\n", - "Write a summary of your experiments. What worked well and what did not? What are your thoughts how could you possibly further improve the model?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "declared-howard", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "rek_uno", - "language": "python", - "name": "rek_uno" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/project_2_recommender_and_evaluation-Copy2.ipynb b/project_2_recommender_and_evaluation-Copy2.ipynb deleted file mode 100644 index 5dc2301..0000000 --- a/project_2_recommender_and_evaluation-Copy2.ipynb +++ /dev/null @@ -1,1979 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 302, - "id": "alike-morgan", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], - "source": [ - "%matplotlib inline\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "from IPython.display import Markdown, display, HTML\n", - "from collections import defaultdict\n", - "\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", - "from livelossplot import PlotLosses\n", - "\n", - "# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n", - "import os\n", - "os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'" - ] - }, - { - "cell_type": "markdown", - "id": "blessed-knitting", - "metadata": {}, - "source": [ - "# Load the dataset for recommenders" - ] - }, - { - "cell_type": "code", - "execution_count": 303, - "id": "victorian-bottom", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iditem_idtermlength_of_stay_bucketrate_planroom_segmentn_people_bucketweekend_stay
010WinterVacation[2-3]Standard[260-360][5-inf]True
121WinterVacation[2-3]Standard[160-260][3-4]True
232WinterVacation[2-3]Standard[160-260][2-2]False
343WinterVacation[4-7]Standard[160-260][3-4]True
454WinterVacation[4-7]Standard[0-160][2-2]True
565Easter[4-7]Standard[260-360][5-inf]True
676OffSeason[2-3]Standard[260-360][5-inf]True
787HighSeason[2-3]Standard[160-260][1-1]True
898HighSeason[2-3]Standard[0-160][1-1]True
987HighSeason[2-3]Standard[160-260][1-1]True
1087HighSeason[2-3]Standard[160-260][1-1]True
11109HighSeason[2-3]Standard[160-260][3-4]True
12119HighSeason[2-3]Standard[160-260][3-4]True
131210HighSeason[8-inf]Standard[160-260][3-4]True
141411HighSeason[2-3]Standard[0-160][3-4]True
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "data_path = os.path.join(\"data\", \"hotel_data\")\n", - "\n", - "interactions_df = pd.read_csv(os.path.join(data_path, \"hotel_data_interactions_df.csv\"), index_col=0)\n", - "\n", - "base_item_features = ['term', 'length_of_stay_bucket', 'rate_plan', 'room_segment', 'n_people_bucket', 'weekend_stay']\n", - "\n", - "column_values_dict = {\n", - " 'term': ['WinterVacation', 'Easter', 'OffSeason', 'HighSeason', 'LowSeason', 'MayLongWeekend', 'NewYear', 'Christmas'],\n", - " 'length_of_stay_bucket': ['[0-1]', '[2-3]', '[4-7]', '[8-inf]'],\n", - " 'rate_plan': ['Standard', 'Nonref'],\n", - " 'room_segment': ['[0-160]', '[160-260]', '[260-360]', '[360-500]', '[500-900]'],\n", - " 'n_people_bucket': ['[1-1]', '[2-2]', '[3-4]', '[5-inf]'],\n", - " 'weekend_stay': ['True', 'False']\n", - "}\n", - "\n", - "interactions_df.loc[:, 'term'] = pd.Categorical(\n", - " interactions_df['term'], categories=column_values_dict['term'])\n", - "interactions_df.loc[:, 'length_of_stay_bucket'] = pd.Categorical(\n", - " interactions_df['length_of_stay_bucket'], categories=column_values_dict['length_of_stay_bucket'])\n", - "interactions_df.loc[:, 'rate_plan'] = pd.Categorical(\n", - " interactions_df['rate_plan'], categories=column_values_dict['rate_plan'])\n", - "interactions_df.loc[:, 'room_segment'] = pd.Categorical(\n", - " interactions_df['room_segment'], categories=column_values_dict['room_segment'])\n", - "interactions_df.loc[:, 'n_people_bucket'] = pd.Categorical(\n", - " interactions_df['n_people_bucket'], categories=column_values_dict['n_people_bucket'])\n", - "interactions_df.loc[:, 'weekend_stay'] = interactions_df['weekend_stay'].astype('str')\n", - "interactions_df.loc[:, 'weekend_stay'] = pd.Categorical(\n", - " interactions_df['weekend_stay'], categories=column_values_dict['weekend_stay'])\n", - "\n", - "display(HTML(interactions_df.head(15).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "realistic-third", - "metadata": {}, - "source": [ - "# (Optional) Prepare numerical user features\n", - "\n", - "The method below is left here for convenience if you want to experiment with content-based user features as an input for your neural network." - ] - }, - { - "cell_type": "code", - "execution_count": 304, - "id": "variable-jaguar", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_length_of_stay_bucket_[0-1]', 'user_length_of_stay_bucket_[2-3]', 'user_length_of_stay_bucket_[4-7]', 'user_length_of_stay_bucket_[8-inf]', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iduser_term_WinterVacationuser_term_Easteruser_term_OffSeasonuser_term_HighSeasonuser_term_LowSeasonuser_term_MayLongWeekenduser_term_NewYearuser_term_Christmasuser_length_of_stay_bucket_[0-1]user_length_of_stay_bucket_[2-3]user_length_of_stay_bucket_[4-7]user_length_of_stay_bucket_[8-inf]user_rate_plan_Standarduser_rate_plan_Nonrefuser_room_segment_[0-160]user_room_segment_[160-260]user_room_segment_[260-360]user_room_segment_[360-500]user_room_segment_[500-900]user_n_people_bucket_[1-1]user_n_people_bucket_[2-2]user_n_people_bucket_[3-4]user_n_people_bucket_[5-inf]user_weekend_stay_Trueuser_weekend_stay_False
010.1304350.00.6521740.0869570.1304350.0000000.0000000.0000000.0000000.6086960.3913040.0000000.5217390.4782610.0000000.8695650.1304350.0000000.00.0000000.7391300.1739130.0869570.7826090.217391
47500.0434780.00.4347830.3043480.2173910.0000000.0000000.0000000.0000000.9130430.0869570.0000000.2608700.7391300.0000000.5652170.4347830.0000000.00.0000000.1739130.5217390.3043480.7826090.217391
92960.0833330.00.7083330.1250000.0416670.0416670.0000000.0000000.2500000.6666670.0416670.0416670.2916670.7083330.1250000.7916670.0833330.0000000.00.0416670.3333330.5416670.0833330.7500000.250000
1111150.7272730.00.2727270.0000000.0000000.0000000.0000000.0000000.5000000.3636360.1363640.0000001.0000000.0000000.0000000.8181820.1818180.0000000.00.8181820.0909090.0454550.0454550.3636360.636364
6757060.0919880.00.4510390.1899110.2077150.0385760.0118690.0089020.1691390.4599410.2729970.0979230.9940650.0059350.0207720.8397630.1305640.0089020.00.0415430.0949550.7388720.1246290.6765580.323442
169917360.0344830.00.4827590.2068970.2758620.0000000.0000000.0000000.2413790.5517240.2068970.0000000.1724140.8275860.0000000.9310340.0689660.0000000.00.3793100.4137930.2068970.0000000.4482760.551724
763977790.0370370.00.2962960.2592590.3703700.0000000.0000000.0370370.1111110.2962960.4814810.1111111.0000000.0000000.0000000.8148150.1851850.0000000.00.0000000.0370370.7407410.2222220.8148150.185185
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def n_to_p(l):\n", - " n = sum(l)\n", - " return [x / n for x in l] if n > 0 else l\n", - "\n", - "def calculate_p(x, values):\n", - " counts = [0]*len(values)\n", - " for v in x:\n", - " counts[values.index(v)] += 1\n", - "\n", - " return n_to_p(counts)\n", - "\n", - "def prepare_users_df(interactions_df):\n", - "\n", - " users_df = interactions_df.loc[:, [\"user_id\"]]\n", - " users_df = users_df.groupby(\"user_id\").first().reset_index(drop=False)\n", - " \n", - " user_features = []\n", - "\n", - " for column in base_item_features:\n", - "\n", - " column_values = column_values_dict[column]\n", - " df = interactions_df.loc[:, ['user_id', column]]\n", - " df = df.groupby('user_id').aggregate(lambda x: list(x)).reset_index(drop=False)\n", - "\n", - " def calc_p(x):\n", - " return calculate_p(x, column_values)\n", - "\n", - " df.loc[:, column] = df[column].apply(lambda x: calc_p(x))\n", - "\n", - " p_columns = []\n", - " for i in range(len(column_values)):\n", - " p_columns.append(\"user_\" + column + \"_\" + column_values[i])\n", - " df.loc[:, p_columns[i]] = df[column].apply(lambda x: x[i])\n", - " user_features.append(p_columns[i])\n", - "\n", - " users_df = pd.merge(users_df, df.loc[:, ['user_id'] + p_columns], on=[\"user_id\"])\n", - " \n", - " return users_df, user_features\n", - " \n", - "\n", - "users_df, user_features = prepare_users_df(interactions_df)\n", - "\n", - "print(user_features)\n", - "\n", - "display(HTML(users_df.loc[users_df['user_id'].isin([706, 1736, 7779, 96, 1, 50, 115])].head(15).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "amino-keyboard", - "metadata": {}, - "source": [ - "# (Optional) Prepare numerical item features\n", - "\n", - "The method below is left here for convenience if you want to experiment with content-based item features as an input for your neural network." - ] - }, - { - "cell_type": "code", - "execution_count": 305, - "id": "formal-munich", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'length_of_stay_bucket_[0-1]', 'length_of_stay_bucket_[2-3]', 'length_of_stay_bucket_[4-7]', 'length_of_stay_bucket_[8-inf]', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
item_idterm_WinterVacationterm_Easterterm_OffSeasonterm_HighSeasonterm_LowSeasonterm_MayLongWeekendterm_NewYearterm_Christmaslength_of_stay_bucket_[0-1]length_of_stay_bucket_[2-3]length_of_stay_bucket_[4-7]length_of_stay_bucket_[8-inf]rate_plan_Standardrate_plan_Nonrefroom_segment_[0-160]room_segment_[160-260]room_segment_[260-360]room_segment_[360-500]room_segment_[500-900]n_people_bucket_[1-1]n_people_bucket_[2-2]n_people_bucket_[3-4]n_people_bucket_[5-inf]weekend_stay_Trueweekend_stay_False
001000000001001000100000110
111000000001001001000001010
221000000001001001000010001
331000000000101001000001010
441000000000101010000010010
550100000000101000100000110
660010000001001000100000110
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def map_items_to_onehot(df):\n", - " one_hot = pd.get_dummies(df.loc[:, base_item_features])\n", - " df = df.drop(base_item_features, axis = 1)\n", - " df = df.join(one_hot)\n", - " \n", - " return df, list(one_hot.columns)\n", - "\n", - "def prepare_items_df(interactions_df):\n", - " items_df = interactions_df.loc[:, [\"item_id\"] + base_item_features].drop_duplicates()\n", - " \n", - " items_df, item_features = map_items_to_onehot(items_df)\n", - " \n", - " return items_df, item_features\n", - "\n", - "\n", - "items_df, item_features = prepare_items_df(interactions_df)\n", - "\n", - "print(item_features)\n", - "\n", - "display(HTML(items_df.loc[items_df['item_id'].isin([0, 1, 2, 3, 4, 5, 6])].head(15).to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "figured-imaging", - "metadata": {}, - "source": [ - "# Neural network recommender\n", - "\n", - "**Task:**
\n", - "Code a recommender based on a neural network model. You are free to choose any network architecture you find appropriate. The network can use the interaction vectors for users and items, embeddings of users and items, as well as user and item features (you can use the features you developed in the first project).\n", - "\n", - "Remember to keep control over randomness - in the init method add the seed as a parameter and initialize the random seed generator with that seed (both for numpy and pytorch):\n", - "\n", - "```python\n", - "self.seed = seed\n", - "self.rng = np.random.RandomState(seed=seed)\n", - "```\n", - "in the network model:\n", - "```python\n", - "self.seed = torch.manual_seed(seed)\n", - "```\n", - "\n", - "You are encouraged to experiment with:\n", - " - the number of layers in the network, the number of neurons and different activation functions,\n", - " - different optimizers and their parameters,\n", - " - batch size and the number of epochs,\n", - " - embedding layers,\n", - " - content-based features of both users and items." - ] - }, - { - "cell_type": "code", - "execution_count": 319, - "id": "unlike-recipient", - "metadata": {}, - "outputs": [], - "source": [ - "from recommenders.recommender import Recommender\n", - "\n", - "\n", - "class Net(nn.Module):\n", - " def __init__(self, features_len, output_len):\n", - " super(Net, self).__init__()\n", - " \n", - " self.fc1 = nn.Linear(features_len, 150)\n", - " self.fc2 = nn.Linear(150, 50)\n", - " self.fc3 = nn.Linear(50, 25)\n", - " self.output = nn.Linear(25, output_len+300)\n", - " \n", - " self.relu1 = nn.PReLU()\n", - " self.relu2 = nn.PReLU()\n", - " self.relu3 = nn.PReLU()\n", - " \n", - " self.dropout = nn.Dropout(p=0.2)\n", - " \n", - " def forward(self, x):\n", - " x = self.fc1(x)\n", - " x = self.relu1(x)\n", - " x = self.fc2(x)\n", - " x = self.relu2(x)\n", - " x = self.dropout(x)\n", - " x = self.fc3(x)\n", - " x = self.relu3(x)\n", - " x = self.output(x)\n", - "\n", - " return x\n", - " \n", - "# class Net(nn.Module):\n", - "# def __init__(self, features_len, output_len):\n", - "# super(Net, self).__init__()\n", - "# self.hid1 = nn.Linear(features_len, 100)\n", - "# self.hid2 = nn.Linear(100, 15)\n", - "# self.oupt = nn.Linear(15, output_len+500)\n", - "\n", - "# nn.init.xavier_uniform_(self.hid1.weight)\n", - "# nn.init.zeros_(self.hid1.bias)\n", - "# nn.init.xavier_uniform_(self.hid2.weight)\n", - "# nn.init.zeros_(self.hid2.bias)\n", - "# nn.init.xavier_uniform_(self.oupt.weight)\n", - "# nn.init.zeros_(self.oupt.bias)\n", - "\n", - "# def forward(self, x):\n", - "# z = torch.tanh(self.hid1(x))\n", - "# z = torch.tanh(self.hid2(z))\n", - "# z = torch.sigmoid(self.oupt(z))\n", - "# return z\n", - " \n", - "# class Net(nn.Module):\n", - "# def __init__(self, features_len, output_len):\n", - "# super(Net, self).__init__()\n", - " \n", - "# self.fc1 = nn.Linear(features_len, 150)\n", - "# self.fc2 = nn.Linear(150, 300)\n", - "# self.fc3 = nn.Linear(300, output_len)\n", - "# self.dropout = nn.Dropout(p=0.5)\n", - "# self.fc4 = nn.Linear(output_len, output_len+300)\n", - " \n", - "# def forward(self, x):\n", - "# x = F.relu(self.fc1(x))\n", - "# x = torch.tanh(self.fc2(x))\n", - "# x = F.relu(self.fc3(x))\n", - "# return self.fc4(x) \n", - " \n", - " \n", - "class NNRecommender(Recommender):\n", - " \"\"\"\n", - " Linear recommender class based on user and item features.\n", - " \"\"\"\n", - " \n", - " def __init__(self, seed=6789, n_neg_per_pos=5, n_epochs=2000, lr=0.05):\n", - " \"\"\"\n", - " Initialize base recommender params and variables.\n", - " \"\"\"\n", - " self.model = None\n", - " self.n_neg_per_pos = n_neg_per_pos\n", - " \n", - " self.recommender_df = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n", - " self.users_df = None\n", - " self.user_features = None\n", - " \n", - " self.seed = seed\n", - " self.rng = np.random.RandomState(seed=seed)\n", - " \n", - " self.n_epochs = n_epochs\n", - " self.lr = lr\n", - " \n", - " def calculate_accuracy(self, y_true, y_pred):\n", - " predictions=(y_pred.argmax(1))\n", - " return (predictions == y_true).sum().float() / len(y_true)\n", - " \n", - " def round_tensor(self, t, decimal_places=3):\n", - " return round(t.item(), decimal_places)\n", - " \n", - " def fit(self, interactions_df, users_df, items_df):\n", - " \"\"\"\n", - " Training of the recommender.\n", - " \n", - " :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items \n", - " defined by user_id, item_id and features of the interaction.\n", - " :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns.\n", - " \"\"\"\n", - " \n", - " interactions_df = interactions_df.copy()\n", - " # Prepare users_df and items_df \n", - " # (optional - use only if you want to train a hybrid model with content-based features)\n", - " \n", - " users_df, user_features = prepare_users_df(interactions_df)\n", - " \n", - " self.users_df = users_df\n", - " self.user_features = user_features\n", - " \n", - " items_df, item_features = prepare_items_df(interactions_df)\n", - " items_df = items_df.loc[:, ['item_id'] + item_features]\n", - " \n", - " X = items_df[['term_WinterVacation', 'term_Easter', 'term_OffSeason', 'term_HighSeason', 'term_LowSeason', 'term_MayLongWeekend', 'term_NewYear', 'term_Christmas', 'rate_plan_Standard', 'rate_plan_Nonref', 'room_segment_[0-160]', 'room_segment_[160-260]', 'room_segment_[260-360]', 'room_segment_[360-500]', 'room_segment_[500-900]', 'n_people_bucket_[1-1]', 'n_people_bucket_[2-2]', 'n_people_bucket_[3-4]', 'n_people_bucket_[5-inf]', 'weekend_stay_True', 'weekend_stay_False']]\n", - " y = items_df[['item_id']]\n", - " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=self.seed)\n", - " \n", - " X_train = torch.from_numpy(X_train.to_numpy()).float()\n", - " y_train = torch.squeeze(torch.from_numpy(y_train.to_numpy()).long())\n", - " X_test = torch.from_numpy(X_test.to_numpy()).float()\n", - " y_test = torch.squeeze(torch.from_numpy(y_test.to_numpy()).long())\n", - " \n", - " self.net = Net(X_train.shape[1], items_df['item_id'].unique().size)\n", - " \n", - " optimizer = optim.Adam(self.net.parameters(), lr=self.lr)\n", - " criterion = nn.CrossEntropyLoss()\n", - " \n", - " for epoch in range(self.n_epochs):\n", - " y_pred = self.net(X_train)\n", - " y_pred = torch.squeeze(y_pred)\n", - " train_loss = criterion(y_pred, y_train)\n", - " \n", - " if (epoch+1) % 100 == 0:\n", - " train_acc = self.calculate_accuracy(y_train, y_pred)\n", - " y_test_pred = self.net(X_test)\n", - " y_test_pred = torch.squeeze(y_test_pred)\n", - " test_loss = criterion(y_test_pred, y_test)\n", - " test_acc = self.calculate_accuracy(y_test, y_test_pred)\n", - " print(\n", - " f'''epoch {epoch}\n", - " Train set - loss: {self.round_tensor(train_loss)}, accuracy: {self.round_tensor(train_acc)}\n", - " Test set - loss: {self.round_tensor(test_loss)}, accuracy: {self.round_tensor(test_acc)}\n", - " ''')\n", - " \n", - " optimizer.zero_grad()\n", - " train_loss.backward()\n", - " optimizer.step()\n", - " \n", - " def recommend(self, users_df, items_df, n_recommendations=1):\n", - " \"\"\"\n", - " Serving of recommendations. Scores items in items_df for each user in users_df and returns \n", - " top n_recommendations for each user.\n", - " \n", - " :param pd.DataFrame users_df: DataFrame with users and their features for which recommendations should be generated.\n", - " :param pd.DataFrame items_df: DataFrame with items and their features which should be scored.\n", - " :param int n_recommendations: Number of recommendations to be returned for each user.\n", - " :return: DataFrame with user_id, item_id and score as columns returning n_recommendations top recommendations \n", - " for each user.\n", - " :rtype: pd.DataFrame\n", - " \"\"\"\n", - " \n", - " # Clean previous recommendations (iloc could be used alternatively)\n", - " self.recommender_df = self.recommender_df[:0]\n", - " \n", - " # Prepare users_df and items_df\n", - " # (optional - use only if you want to train a hybrid model with content-based features)\n", - " \n", - " users_df = users_df.loc[:, 'user_id']\n", - " users_df = pd.merge(users_df, self.users_df, on=['user_id'], how='left').fillna(0)\n", - " \n", - " # items_df, item_features = prepare_items_df(items_df)\n", - " # items_df = items_df.loc[:, ['item_id'] + item_features]\n", - " \n", - " # Score the items\n", - " \n", - " recommendations = pd.DataFrame(columns=['user_id', 'item_id', 'score'])\n", - " \n", - " for ix, user in users_df.iterrows():\n", - " prep_user = torch.from_numpy(user[['user_term_WinterVacation', 'user_term_Easter', 'user_term_OffSeason', 'user_term_HighSeason', 'user_term_LowSeason', 'user_term_MayLongWeekend', 'user_term_NewYear', 'user_term_Christmas', 'user_rate_plan_Standard', 'user_rate_plan_Nonref', 'user_room_segment_[0-160]', 'user_room_segment_[160-260]', 'user_room_segment_[260-360]', 'user_room_segment_[360-500]', 'user_room_segment_[500-900]', 'user_n_people_bucket_[1-1]', 'user_n_people_bucket_[2-2]', 'user_n_people_bucket_[3-4]', 'user_n_people_bucket_[5-inf]', 'user_weekend_stay_True', 'user_weekend_stay_False']].to_numpy()).float()\n", - " \n", - " scores = self.net(prep_user).detach().numpy()\n", - " \n", - " chosen_ids = np.argsort(-scores)[:n_recommendations]\n", - " \n", - " recommendations = []\n", - " for item_id in chosen_ids:\n", - " recommendations.append(\n", - " {\n", - " 'user_id': user['user_id'],\n", - " 'item_id': item_id,\n", - " 'score': scores[item_id]\n", - " }\n", - " )\n", - " \n", - " user_recommendations = pd.DataFrame(recommendations)\n", - " \n", - " self.recommender_df = pd.concat([self.recommender_df, user_recommendations])\n", - " \n", - " return self.recommender_df\n", - "\n", - "# Fit method\n", - "# nn_recommender = NNRecommender(6789, 5, 300, 0.05)\n", - "# nn_recommender.fit(interactions_df.head(1000), None, None)\n", - "# nn_recommender.fit(interactions_df, None, None)" - ] - }, - { - "cell_type": "markdown", - "id": "copyrighted-relative", - "metadata": {}, - "source": [ - "# Quick test of the recommender" - ] - }, - { - "cell_type": "code", - "execution_count": 307, - "id": "greatest-canon", - "metadata": {}, - "outputs": [], - "source": [ - "items_df = interactions_df.loc[:, ['item_id'] + base_item_features].drop_duplicates()" - ] - }, - { - "cell_type": "code", - "execution_count": 308, - "id": "initial-capital", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "epoch 99\n", - " Train set - loss: 0.958, accuracy: 0.474\n", - " Test set - loss: 28.826, accuracy: 0.0\n", - " \n", - "epoch 199\n", - " Train set - loss: 0.922, accuracy: 0.484\n", - " Test set - loss: 26.949, accuracy: 0.0\n", - " \n", - "epoch 299\n", - " Train set - loss: 0.921, accuracy: 0.476\n", - " Test set - loss: 25.042, accuracy: 0.0\n", - " \n", - "epoch 399\n", - " Train set - loss: 0.907, accuracy: 0.481\n", - " Test set - loss: 23.741, accuracy: 0.0\n", - " \n", - "epoch 499\n", - " Train set - loss: 0.897, accuracy: 0.474\n", - " Test set - loss: 23.28, accuracy: 0.0\n", - " \n", - "epoch 599\n", - " Train set - loss: 0.894, accuracy: 0.472\n", - " Test set - loss: 23.993, accuracy: 0.0\n", - " \n", - "epoch 699\n", - " Train set - loss: 0.889, accuracy: 0.5\n", - " Test set - loss: 24.347, accuracy: 0.0\n", - " \n", - "epoch 799\n", - " Train set - loss: 0.907, accuracy: 0.472\n", - " Test set - loss: 25.641, accuracy: 0.0\n", - " \n", - "epoch 899\n", - " Train set - loss: 0.9, accuracy: 0.456\n", - " Test set - loss: 25.375, accuracy: 0.0\n", - " \n", - "epoch 999\n", - " Train set - loss: 0.885, accuracy: 0.479\n", - " Test set - loss: 25.575, accuracy: 0.0\n", - " \n", - "epoch 1099\n", - " Train set - loss: 0.877, accuracy: 0.494\n", - " Test set - loss: 25.631, accuracy: 0.0\n", - " \n", - "epoch 1199\n", - " Train set - loss: 0.881, accuracy: 0.482\n", - " Test set - loss: 26.272, accuracy: 0.0\n", - " \n" - ] - } - ], - "source": [ - "# Fit method\n", - "nn_recommender = NNRecommender(10000, 0.1)\n", - "# nn_recommender.fit(interactions_df.head(1000), None, None)\n", - "nn_recommender.fit(interactions_df, None, None)" - ] - }, - { - "cell_type": "code", - "execution_count": 309, - "id": "digital-consolidation", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_iditem_idscoretermlength_of_stay_bucketrate_planroom_segmentn_people_bucketweekend_stay
01.01034.634204OffSeason[2-3]Nonref[160-260][2-2]True
11.04664.432645OffSeason[0-1]Nonref[160-260][2-2]True
21.01094.307235OffSeason[4-7]Nonref[160-260][2-2]True
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Recommender method\n", - "\n", - "recommendations = nn_recommender.recommend(pd.DataFrame([[1]], columns=['user_id']), items_df, 3)\n", - "\n", - "recommendations = pd.merge(recommendations, items_df, on='item_id', how='left')\n", - "display(HTML(recommendations.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "advanced-eleven", - "metadata": {}, - "source": [ - "# Tuning method" - ] - }, - { - "cell_type": "code", - "execution_count": 310, - "id": "strange-alaska", - "metadata": {}, - "outputs": [], - "source": [ - "from evaluation_and_testing.testing import evaluate_train_test_split_implicit\n", - "\n", - "seed = 6789" - ] - }, - { - "cell_type": "code", - "execution_count": 311, - "id": "stable-theta", - "metadata": {}, - "outputs": [], - "source": [ - "from hyperopt import hp, fmin, tpe, Trials\n", - "import traceback\n", - "\n", - "def tune_recommender(recommender_class, interactions_df, items_df, \n", - " param_space, max_evals=1, show_progressbar=True, seed=6789):\n", - " # Split into train_validation and test sets\n", - "\n", - " shuffle = np.arange(len(interactions_df))\n", - " rng = np.random.RandomState(seed=seed)\n", - " rng.shuffle(shuffle)\n", - " shuffle = list(shuffle)\n", - "\n", - " train_test_split = 0.8\n", - " split_index = int(len(interactions_df) * train_test_split)\n", - "\n", - " train_validation = interactions_df.iloc[shuffle[:split_index]]\n", - " test = interactions_df.iloc[shuffle[split_index:]]\n", - "\n", - " # Tune\n", - "\n", - " def loss(tuned_params):\n", - " recommender = recommender_class(seed=seed, **tuned_params)\n", - " hr1, hr3, hr5, hr10, ndcg1, ndcg3, ndcg5, ndcg10 = evaluate_train_test_split_implicit(\n", - " recommender, train_validation, items_df, seed=seed)\n", - " return -hr10\n", - "\n", - " n_tries = 1\n", - " succeded = False\n", - " try_id = 0\n", - " while not succeded and try_id < n_tries:\n", - " try:\n", - " trials = Trials()\n", - " best_param_set = fmin(loss, space=param_space, algo=tpe.suggest, \n", - " max_evals=max_evals, show_progressbar=show_progressbar, trials=trials, verbose=True)\n", - " succeded = True\n", - " except:\n", - " traceback.print_exc()\n", - " try_id += 1\n", - " \n", - " if not succeded:\n", - " return None\n", - " \n", - " # Validate\n", - " \n", - " recommender = recommender_class(seed=seed, **best_param_set)\n", - "\n", - " results = [[recommender_class.__name__] + list(evaluate_train_test_split_implicit(\n", - " recommender, {'train': train_validation, 'test': test}, items_df, seed=seed))]\n", - "\n", - " results = pd.DataFrame(results, \n", - " columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - " display(HTML(results.to_html()))\n", - " \n", - " return best_param_set" - ] - }, - { - "cell_type": "markdown", - "id": "reliable-switzerland", - "metadata": {}, - "source": [ - "## Tuning of the recommender\n", - "\n", - "**Task:**
\n", - "Tune your model using the code below. You only need to put the class name of your recommender and choose an appropriate parameter space." - ] - }, - { - "cell_type": "code", - "execution_count": 312, - "id": "obvious-astrology", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "epoch 99 \n", - " Train set - loss: 0.96, accuracy: 0.475\n", - " Test set - loss: 28.477, accuracy: 0.0\n", - " \n", - "epoch 199 \n", - " Train set - loss: 0.895, accuracy: 0.475\n", - " Test set - loss: 27.29, accuracy: 0.0\n", - " \n", - "epoch 299 \n", - " Train set - loss: 0.9, accuracy: 0.48\n", - " Test set - loss: 25.707, accuracy: 0.0\n", - " \n", - "epoch 399 \n", - " Train set - loss: 0.87, accuracy: 0.498\n", - " Test set - loss: 25.687, accuracy: 0.0\n", - " \n", - "epoch 499 \n", - " Train set - loss: 0.886, accuracy: 0.476\n", - " Test set - loss: 24.167, accuracy: 0.0\n", - " \n", - "epoch 599 \n", - " Train set - loss: 0.876, accuracy: 0.482\n", - " Test set - loss: 23.449, accuracy: 0.0\n", - " \n", - "epoch 699 \n", - " Train set - loss: 0.876, accuracy: 0.487\n", - " Test set - loss: 23.576, accuracy: 0.0\n", - " \n", - "epoch 799 \n", - " Train set - loss: 0.867, accuracy: 0.473\n", - " Test set - loss: 22.554, accuracy: 0.0\n", - " \n", - "epoch 899 \n", - " Train set - loss: 0.865, accuracy: 0.496\n", - " Test set - loss: 23.201, accuracy: 0.0\n", - " \n", - "epoch 999 \n", - " Train set - loss: 0.845, accuracy: 0.509\n", - " Test set - loss: 25.268, accuracy: 0.0\n", - " \n", - "epoch 1099 \n", - " Train set - loss: 0.855, accuracy: 0.493\n", - " Test set - loss: 25.903, accuracy: 0.0\n", - " \n", - "epoch 1199 \n", - " Train set - loss: 0.855, accuracy: 0.48\n", - " Test set - loss: 24.97, accuracy: 0.0\n", - " \n", - "100%|██████████| 1/1 [02:23<00:00, 143.24s/trial, best loss: -0.031544448996312986]\n", - "epoch 99\n", - " Train set - loss: 0.999, accuracy: 0.471\n", - " Test set - loss: 28.026, accuracy: 0.0\n", - " \n", - "epoch 199\n", - " Train set - loss: 0.937, accuracy: 0.457\n", - " Test set - loss: 26.713, accuracy: 0.0\n", - " \n", - "epoch 299\n", - " Train set - loss: 0.937, accuracy: 0.481\n", - " Test set - loss: 25.02, accuracy: 0.0\n", - " \n", - "epoch 399\n", - " Train set - loss: 0.91, accuracy: 0.481\n", - " Test set - loss: 23.575, accuracy: 0.0\n", - " \n", - "epoch 499\n", - " Train set - loss: 0.912, accuracy: 0.491\n", - " Test set - loss: 24.782, accuracy: 0.0\n", - " \n", - "epoch 599\n", - " Train set - loss: 0.918, accuracy: 0.49\n", - " Test set - loss: 23.602, accuracy: 0.0\n", - " \n", - "epoch 699\n", - " Train set - loss: 0.916, accuracy: 0.478\n", - " Test set - loss: 23.995, accuracy: 0.0\n", - " \n", - "epoch 799\n", - " Train set - loss: 0.9, accuracy: 0.463\n", - " Test set - loss: 24.721, accuracy: 0.0\n", - " \n", - "epoch 899\n", - " Train set - loss: 0.905, accuracy: 0.48\n", - " Test set - loss: 26.169, accuracy: 0.0\n", - " \n", - "epoch 999\n", - " Train set - loss: 0.896, accuracy: 0.48\n", - " Test set - loss: 25.179, accuracy: 0.0\n", - " \n", - "epoch 1099\n", - " Train set - loss: 0.884, accuracy: 0.469\n", - " Test set - loss: 27.071, accuracy: 0.0\n", - " \n", - "epoch 1199\n", - " Train set - loss: 0.91, accuracy: 0.468\n", - " Test set - loss: 27.978, accuracy: 0.0\n", - " \n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0NNRecommender0.0082260.017440.0236920.0332350.0082260.0136520.0162580.019356
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Best parameters:\n", - "{'n_neg_per_pos': 7.0}\n" - ] - } - ], - "source": [ - "param_space = {\n", - " 'n_neg_per_pos': hp.quniform('n_neg_per_pos', 1, 10, 1)\n", - "}\n", - "items_df['item_id'].unique().size\n", - "\n", - "best_param_set = tune_recommender(NNRecommender, interactions_df, items_df,\n", - " param_space, max_evals=1, show_progressbar=True, seed=seed)\n", - "\n", - "print(\"Best parameters:\")\n", - "print(best_param_set)" - ] - }, - { - "cell_type": "markdown", - "id": "accredited-strap", - "metadata": {}, - "source": [ - "# Final evaluation\n", - "\n", - "**Task:**
\n", - "Run the final evaluation of your recommender and present its results against the Amazon and Netflix recommenders' results. You just need to give the class name of your recommender and its tuned parameters below." - ] - }, - { - "cell_type": "code", - "execution_count": 318, - "id": "given-homework", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "epoch 99\n", - " Train set - loss: 0.956, accuracy: 0.463\n", - " Test set - loss: 47.605, accuracy: 0.0\n", - " \n", - "epoch 199\n", - " Train set - loss: 0.949, accuracy: 0.468\n", - " Test set - loss: 41.11, accuracy: 0.0\n", - " \n", - "epoch 299\n", - " Train set - loss: 0.911, accuracy: 0.485\n", - " Test set - loss: 37.505, accuracy: 0.0\n", - " \n", - "epoch 399\n", - " Train set - loss: 0.918, accuracy: 0.461\n", - " Test set - loss: 36.35, accuracy: 0.0\n", - " \n", - "epoch 499\n", - " Train set - loss: 0.925, accuracy: 0.497\n", - " Test set - loss: 36.651, accuracy: 0.0\n", - " \n", - "epoch 599\n", - " Train set - loss: 0.901, accuracy: 0.495\n", - " Test set - loss: 35.965, accuracy: 0.0\n", - " \n", - "epoch 699\n", - " Train set - loss: 0.908, accuracy: 0.474\n", - " Test set - loss: 34.862, accuracy: 0.0\n", - " \n", - "epoch 799\n", - " Train set - loss: 0.885, accuracy: 0.485\n", - " Test set - loss: 33.993, accuracy: 0.0\n", - " \n", - "epoch 899\n", - " Train set - loss: 0.894, accuracy: 0.51\n", - " Test set - loss: 35.172, accuracy: 0.0\n", - " \n", - "epoch 999\n", - " Train set - loss: 0.897, accuracy: 0.474\n", - " Test set - loss: 34.52, accuracy: 0.0\n", - " \n", - "epoch 1099\n", - " Train set - loss: 0.888, accuracy: 0.483\n", - " Test set - loss: 34.963, accuracy: 0.0\n", - " \n", - "epoch 1199\n", - " Train set - loss: 0.89, accuracy: 0.457\n", - " Test set - loss: 34.955, accuracy: 0.0\n", - " \n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0NNRecommender0.0062520.0144780.0240210.0319180.0062520.0109250.0148620.017392
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "nn_recommender = NNRecommender(n_neg_per_pos=5) # Initialize your recommender here\n", - "\n", - "# Give the name of your recommender in the line below\n", - "nn_tts_results = [['NNRecommender'] + list(evaluate_train_test_split_implicit(\n", - " nn_recommender, interactions_df, items_df))]\n", - "\n", - "nn_tts_results = pd.DataFrame(\n", - " nn_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(nn_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 314, - "id": "suited-nomination", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0AmazonRecommender0.0421190.104640.1405070.1994080.0421190.0768260.0917970.110711
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from recommenders.amazon_recommender import AmazonRecommender\n", - "\n", - "amazon_recommender = AmazonRecommender()\n", - "\n", - "amazon_tts_results = [['AmazonRecommender'] + list(evaluate_train_test_split_implicit(\n", - " amazon_recommender, interactions_df, items_df))]\n", - "\n", - "amazon_tts_results = pd.DataFrame(\n", - " amazon_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(amazon_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 315, - "id": "conservative-remedy", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loss\n", - "\ttraining \t (min: 0.161, max: 0.228, cur: 0.161)\n", - "\tvalidation \t (min: 0.176, max: 0.242, cur: 0.177)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0NetflixRecommender0.0427770.1066140.1431390.2003950.0427770.0782280.0934830.111724
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from recommenders.netflix_recommender import NetflixRecommender\n", - "\n", - "netflix_recommender = NetflixRecommender(n_epochs=30, print_type='live')\n", - "\n", - "netflix_tts_results = [['NetflixRecommender'] + list(evaluate_train_test_split_implicit(\n", - " netflix_recommender, interactions_df, items_df))]\n", - "\n", - "netflix_tts_results = pd.DataFrame(\n", - " netflix_tts_results, columns=['Recommender', 'HR@1', 'HR@3', 'HR@5', 'HR@10', 'NDCG@1', 'NDCG@3', 'NDCG@5', 'NDCG@10'])\n", - "\n", - "display(HTML(netflix_tts_results.to_html()))" - ] - }, - { - "cell_type": "code", - "execution_count": 316, - "id": "moderate-printing", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0NNRecommender0.0065810.0157950.0240210.0361960.0065810.0118770.0152620.019205
1AmazonRecommender0.0421190.1046400.1405070.1994080.0421190.0768260.0917970.110711
2NetflixRecommender0.0427770.1066140.1431390.2003950.0427770.0782280.0934830.111724
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "tts_results = pd.concat([nn_tts_results, amazon_tts_results, netflix_tts_results]).reset_index(drop=True)\n", - "display(HTML(tts_results.to_html()))" - ] - }, - { - "cell_type": "markdown", - "id": "uniform-vegetable", - "metadata": {}, - "source": [ - "# Summary\n", - "\n", - "**Task:**
\n", - "Write a summary of your experiments. What worked well and what did not? What are your thoughts how could you possibly further improve the model?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "04e565c4", - "metadata": {}, - "outputs": [], - "source": [ - "Na początku bezmyślnie użyłem BCELoss, \n", - "to był duży błąd, który kosztował mnie godzinę szukania w internecie, dlaczego ciągle zwraca mi tylko item-id=0\n", - "\n", - "\n", - "Dodanie dropout zwiększyło HR10 z 0.03 do 0.11\n", - "\n", - "Podsumowanie:\n", - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "rek_uno", - "language": "python", - "name": "rek_uno" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/project_2_recommender_and_evaluation.html b/project_2_recommender_and_evaluation.html new file mode 100644 index 0000000..2b2fa80 --- /dev/null +++ b/project_2_recommender_and_evaluation.html @@ -0,0 +1,17156 @@ + + + + + +project_2_recommender_and_evaluation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/project_2_recommender_and_evaluation.ipynb b/project_2_recommender_and_evaluation.ipynb index 1ab01bb..260b9c4 100644 --- a/project_2_recommender_and_evaluation.ipynb +++ b/project_2_recommender_and_evaluation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 302, + "execution_count": 449, "id": "alike-morgan", "metadata": {}, "outputs": [ @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 303, + "execution_count": 450, "id": "victorian-bottom", "metadata": {}, "outputs": [ @@ -290,7 +290,7 @@ }, { "cell_type": "code", - "execution_count": 304, + "execution_count": 451, "id": "variable-jaguar", "metadata": {}, "outputs": [ @@ -611,7 +611,7 @@ }, { "cell_type": "code", - "execution_count": 305, + "execution_count": 452, "id": "formal-munich", "metadata": {}, "outputs": [ @@ -926,7 +926,7 @@ }, { "cell_type": "code", - "execution_count": 446, + "execution_count": 457, "id": "unlike-recipient", "metadata": {}, "outputs": [], @@ -999,25 +999,6 @@ "# return x\n", " \n", "# HR10 = 0.116 EPOCH 20000\n", - "# class Net(nn.Module):\n", - "# def __init__(self, features_len, output_len):\n", - "# super(Net, self).__init__()\n", - " \n", - "# self.fc1 = nn.Linear(features_len, 150)\n", - "# self.fc2 = nn.Linear(150, 100)\n", - "# self.fc3 = nn.Linear(100, output_len)\n", - "# self.fc4 = nn.Linear(output_len, output_len+200)\n", - " \n", - "# self.dropout = nn.Dropout(p=0.5)\n", - " \n", - "# def forward(self, x):\n", - "# x = F.relu(self.fc1(x))\n", - "# x = self.dropout(x)\n", - "# x = F.relu(self.fc2(x))\n", - "# x = self.dropout(x)\n", - "# x = F.relu(self.fc3(x))\n", - "# return self.fc4(x)\n", - " \n", "class Net(nn.Module):\n", " def __init__(self, features_len, output_len):\n", " super(Net, self).__init__()\n", @@ -1028,18 +1009,38 @@ " self.fc4 = nn.Linear(output_len, output_len+200)\n", " \n", " self.dropout = nn.Dropout(p=0.5)\n", - " self.prelu = nn.PReLU()\n", " \n", " def forward(self, x):\n", - " x = self.fc1(x)\n", - " x = self.prelu(x)\n", + " x = F.relu(self.fc1(x))\n", " x = self.dropout(x)\n", - " x = self.fc2(x)\n", - " x = self.prelu(x)\n", + " x = F.relu(self.fc2(x))\n", " x = self.dropout(x)\n", - " x = self.fc3(x)\n", - " x = self.prelu(x)\n", + " x = F.relu(self.fc3(x))\n", " return self.fc4(x)\n", + "\n", + "# A lot slower than ReLU\n", + "# class Net(nn.Module):\n", + "# def __init__(self, features_len, output_len):\n", + "# super(Net, self).__init__()\n", + " \n", + "# self.fc1 = nn.Linear(features_len, 150)\n", + "# self.fc2 = nn.Linear(150, 100)\n", + "# self.fc3 = nn.Linear(100, output_len)\n", + "# self.fc4 = nn.Linear(output_len, output_len+200)\n", + " \n", + "# self.dropout = nn.Dropout(p=0.5)\n", + "# self.prelu = nn.PReLU()\n", + " \n", + "# def forward(self, x):\n", + "# x = self.fc1(x)\n", + "# x = self.prelu(x)\n", + "# x = self.dropout(x)\n", + "# x = self.fc2(x)\n", + "# x = self.prelu(x)\n", + "# x = self.dropout(x)\n", + "# x = self.fc3(x)\n", + "# x = self.prelu(x)\n", + "# return self.fc4(x)\n", " \n", "class NNRecommender(Recommender):\n", " \"\"\"\n", @@ -1193,7 +1194,7 @@ }, { "cell_type": "code", - "execution_count": 412, + "execution_count": 13, "id": "greatest-canon", "metadata": {}, "outputs": [], @@ -1358,7 +1359,7 @@ }, { "cell_type": "code", - "execution_count": 310, + "execution_count": 454, "id": "strange-alaska", "metadata": {}, "outputs": [], @@ -1370,7 +1371,7 @@ }, { "cell_type": "code", - "execution_count": 311, + "execution_count": 455, "id": "stable-theta", "metadata": {}, "outputs": [], @@ -1445,10 +1446,10 @@ }, { "cell_type": "code", - "execution_count": 447, + "execution_count": 458, "id": "obvious-astrology", "metadata": { - "scrolled": false + "scrolled": true }, "outputs": [ { @@ -1456,102 +1457,959 @@ "output_type": "stream", "text": [ "epoch 0 \n", - " Train set - loss: 6.797\n", - " Test set - loss: 6.793\n", + " Train set - loss: 6.791\n", + " Test set - loss: 6.798\n", " \n", "epoch 1000 \n", - " Train set - loss: 1.009\n", - " Test set - loss: 29.285\n", + " Train set - loss: 1.044\n", + " Test set - loss: 25.104\n", " \n", "epoch 2000 \n", - " Train set - loss: 1.055\n", - " Test set - loss: 30.205\n", + " Train set - loss: 1.031\n", + " Test set - loss: 28.583\n", " \n", "epoch 3000 \n", - " Train set - loss: 0.971\n", - " Test set - loss: 35.335\n", + " Train set - loss: 0.995\n", + " Test set - loss: 32.894\n", " \n", "epoch 4000 \n", - " Train set - loss: 0.948\n", - " Test set - loss: 35.459\n", + " Train set - loss: 0.958\n", + " Test set - loss: 32.049\n", " \n", "epoch 5000 \n", - " Train set - loss: 0.927\n", - " Test set - loss: 35.575\n", + " Train set - loss: 0.95\n", + " Test set - loss: 33.561\n", " \n", "epoch 6000 \n", - " Train set - loss: 0.968\n", - " Test set - loss: 37.951\n", + " Train set - loss: 0.919\n", + " Test set - loss: 37.039\n", " \n", "epoch 7000 \n", - " Train set - loss: 0.963\n", - " Test set - loss: 50.067\n", + " Train set - loss: 0.951\n", + " Test set - loss: 41.181\n", " \n", "epoch 8000 \n", - " Train set - loss: 0.919\n", - " Test set - loss: 48.694\n", + " Train set - loss: 0.914\n", + " Test set - loss: 39.916\n", " \n", "epoch 9000 \n", - " Train set - loss: 0.888\n", - " Test set - loss: 51.907\n", + " Train set - loss: 0.996\n", + " Test set - loss: 40.807\n", " \n", "epoch 10000 \n", - " Train set - loss: 4.246\n", - " Test set - loss: 115.464\n", + " Train set - loss: 0.917\n", + " Test set - loss: 43.963\n", " \n", "epoch 11000 \n", - " Train set - loss: 0.911\n", - " Test set - loss: 57.464\n", + " Train set - loss: 0.974\n", + " Test set - loss: 42.84\n", " \n", "epoch 12000 \n", - " Train set - loss: 0.872\n", - " Test set - loss: 64.896\n", + " Train set - loss: 0.961\n", + " Test set - loss: 48.198\n", " \n", "epoch 13000 \n", - " Train set - loss: 0.931\n", - " Test set - loss: 52.029\n", + " Train set - loss: 0.923\n", + " Test set - loss: 50.819\n", " \n", "epoch 14000 \n", - " Train set - loss: 1.024\n", - " Test set - loss: 56.175\n", + " Train set - loss: 0.989\n", + " Test set - loss: 50.511\n", " \n", - " 0%| | 0/10 [18:33\", line 33, in tune_recommender\n", - " best_param_set = fmin(loss, space=param_space, algo=tpe.suggest,\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 507, in fmin\n", - " return trials.fmin(\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/base.py\", line 682, in fmin\n", - " return fmin(\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 553, in fmin\n", - " rval.exhaust()\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 356, in exhaust\n", - " self.run(self.max_evals - n_done, block_until_done=self.asynchronous)\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 292, in run\n", - " self.serial_evaluate()\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/fmin.py\", line 170, in serial_evaluate\n", - " result = self.domain.evaluate(spec, ctrl)\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/hyperopt/base.py\", line 907, in evaluate\n", - " rval = self.fn(pyll_rval)\n", - " File \"\", line 23, in loss\n", - " hr1, hr3, hr5, hr10, ndcg1, ndcg3, ndcg5, ndcg10 = evaluate_train_test_split_implicit(\n", - " File \"/home/jovyan/REK/evaluation_and_testing/testing.py\", line 93, in evaluate_train_test_split_implicit\n", - " recommender.fit(interactions_df_train, None, items_df)\n", - " File \"\", line 192, in fit\n", - " train_loss.backward()\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/torch/tensor.py\", line 245, in backward\n", - " torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)\n", - " File \"/opt/conda/envs/rek_uno/lib/python3.8/site-packages/torch/autograd/__init__.py\", line 145, in backward\n", - " Variable._execution_engine.run_backward(\n", - "KeyboardInterrupt\n" + "epoch 15000 \n", + " Train set - loss: 0.974\n", + " Test set - loss: 57.224\n", + " \n", + "epoch 16000 \n", + " Train set - loss: 0.933\n", + " Test set - loss: 62.57\n", + " \n", + "epoch 17000 \n", + " Train set - loss: 0.96\n", + " Test set - loss: 63.399\n", + " \n", + "epoch 18000 \n", + " Train set - loss: 0.937\n", + " Test set - loss: 65.288\n", + " \n", + "epoch 19000 \n", + " Train set - loss: 1.02\n", + " Test set - loss: 62.537\n", + " \n", + "epoch 0 \n", + " Train set - loss: 6.797\n", + " Test set - loss: 6.792\n", + " \n", + "epoch 1000 \n", + " Train set - loss: 1.106\n", + " Test set - loss: 23.897\n", + " \n", + "epoch 2000 \n", + " Train set - loss: 1.028\n", + " Test set - loss: 25.238\n", + " \n", + "epoch 3000 \n", + " Train set - loss: 0.981\n", + " Test set - loss: 29.186\n", + " \n", + "epoch 4000 \n", + " Train set - loss: 0.981\n", + " Test set - loss: 30.399\n", + " \n", + "epoch 5000 \n", + " Train set - loss: 0.967\n", + " Test set - loss: 33.602\n", + " \n", + "epoch 6000 \n", + " Train set - loss: 0.992\n", + " Test set - loss: 35.063\n", + " \n", + "epoch 7000 \n", + " Train set - loss: 0.955\n", + " Test set - loss: 35.093\n", + " \n", + "epoch 8000 \n", + " Train set - loss: 0.984\n", + " Test set - loss: 35.48\n", + " \n", + "epoch 9000 \n", + " Train set - loss: 1.044\n", + " Test set - loss: 37.907\n", + " \n", + "epoch 10000 \n", + " Train set - loss: 0.914\n", + " Test set - loss: 40.246\n", + " \n", + "epoch 11000 \n", + " Train set - loss: 0.941\n", + " Test set - loss: 41.36\n", + " \n", + "epoch 12000 \n", + " Train set - loss: 0.995\n", + " Test set - loss: 41.922\n", + " \n", + "epoch 13000 \n", + " Train set - loss: 0.991\n", + " Test set - loss: 45.061\n", + " \n", + "epoch 14000 \n", + " Train set - loss: 0.907\n", + " Test set - loss: 47.871\n", + " \n", + "epoch 15000 \n", + " Train set - loss: 0.964\n", + " Test set - loss: 49.0\n", + " \n", + "epoch 16000 \n", + " Train set - loss: 0.918\n", + " Test set - loss: 49.898\n", + " \n", + "epoch 17000 \n", + " Train set - loss: 0.925\n", + " Test set - loss: 52.609\n", + " \n", + "epoch 18000 \n", + " Train set - loss: 0.943\n", + " Test set - loss: 55.524\n", + " \n", + "epoch 19000 \n", + " Train set - loss: 0.988\n", + " Test set - loss: 53.781\n", + " \n", + "epoch 0 \n", + " Train set - loss: 6.797\n", + " Test set - loss: 6.794\n", + " \n", + "epoch 1000 \n", + " Train set - loss: 1.083\n", + " Test set - loss: 24.762\n", + " \n", + "epoch 2000 \n", + " Train set - loss: 1.002\n", + " Test set - loss: 26.87\n", + " \n", + "epoch 3000 \n", + " Train set - loss: 1.002\n", + " Test set - loss: 29.752\n", + " \n", + "epoch 4000 \n", + " Train set - loss: 0.902\n", + " Test set - loss: 30.802\n", + " \n", + "epoch 5000 \n", + " Train set - loss: 0.966\n", + " Test set - loss: 33.726\n", + " \n", + "epoch 6000 \n", + " Train set - loss: 0.929\n", + " Test set - loss: 38.221\n", + " \n", + "epoch 7000 \n", + " Train set - loss: 0.923\n", + " Test set - loss: 40.249\n", + " \n", + "epoch 8000 \n", + " Train set - loss: 0.941\n", + " Test set - loss: 43.72\n", + " \n", + "epoch 9000 \n", + " Train set - loss: 0.988\n", + " Test set - loss: 45.261\n", + " \n", + "epoch 10000 \n", + " Train set - loss: 0.958\n", + " Test set - loss: 49.028\n", + " \n", + "epoch 11000 \n", + " Train set - loss: 0.914\n", + " Test set - loss: 51.199\n", + " \n", + "epoch 12000 \n", + " Train set - loss: 0.984\n", + " Test set - loss: 52.24\n", + " \n", + "epoch 13000 \n", + " Train set - loss: 0.935\n", + " Test set - loss: 58.326\n", + " \n", + "epoch 14000 \n", + " Train set - loss: 0.932\n", + " Test set - loss: 55.572\n", + " \n", + "epoch 15000 \n", + " Train set - loss: 0.932\n", + " Test set - loss: 57.253\n", + " \n", + "epoch 16000 \n", + " Train set - loss: 0.901\n", + " Test set - loss: 59.313\n", + " \n", + "epoch 17000 \n", + " Train set - loss: 0.934\n", + " Test set - loss: 59.817\n", + " \n", + "epoch 18000 \n", + " Train set - loss: 0.994\n", + " Test set - loss: 57.325\n", + " \n", + "epoch 19000 \n", + " Train set - loss: 0.913\n", + " Test set - loss: 59.364\n", + " \n", + "epoch 0 \n", + " Train set - loss: 6.795\n", + " Test set - loss: 6.796\n", + " \n", + "epoch 1000 \n", + " Train set - loss: 1.067\n", + " Test set - loss: 25.381\n", + " \n", + "epoch 2000 \n", + " Train set - loss: 1.039\n", + " Test set - loss: 27.164\n", + " \n", + "epoch 3000 \n", + " Train set - loss: 0.958\n", + " Test set - loss: 30.859\n", + " \n", + "epoch 4000 \n", + " Train set - loss: 0.961\n", + " Test set - loss: 32.549\n", + " \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch 5000 \n", + " Train set - loss: 0.922\n", + " Test set - loss: 38.252\n", + " \n", + "epoch 6000 \n", + " Train set - loss: 0.971\n", + " Test set - loss: 37.736\n", + " \n", + "epoch 7000 \n", + " Train set - loss: 0.986\n", + " Test set - loss: 43.201\n", + " \n", + "epoch 8000 \n", + " Train set - loss: 0.949\n", + " Test set - loss: 43.737\n", + " \n", + "epoch 9000 \n", + " Train set - loss: 0.895\n", + " Test set - loss: 44.754\n", + " \n", + "epoch 10000 \n", + " Train set - loss: 0.976\n", + " Test set - loss: 49.17\n", + " \n", + "epoch 11000 \n", + " Train set - loss: 0.941\n", + " Test set - loss: 51.909\n", + " \n", + "epoch 12000 \n", + " Train set - loss: 0.917\n", + " Test set - loss: 53.406\n", + " \n", + "epoch 13000 \n", + " Train set - loss: 0.97\n", + " Test set - loss: 57.24\n", + " \n", + "epoch 14000 \n", + " Train set - loss: 0.944\n", + " Test set - loss: 54.791\n", + " \n", + "epoch 15000 \n", + " Train set - loss: 0.969\n", + " Test set - loss: 56.372\n", + " \n", + "epoch 16000 \n", + " Train set - loss: 0.981\n", + " Test set - loss: 58.586\n", + " \n", + "epoch 17000 \n", + " Train set - loss: 0.965\n", + " Test set - loss: 57.376\n", + " \n", + "epoch 18000 \n", + " Train set - loss: 0.988\n", + " Test set - loss: 60.655\n", + " \n", + "epoch 19000 \n", + " Train set - loss: 0.883\n", + " Test set - loss: 58.51\n", + " \n", + "epoch 0 \n", + " Train set - loss: 6.794\n", + " Test set - loss: 6.786\n", + " \n", + "epoch 1000 \n", + " Train set - loss: 1.074\n", + " Test set - loss: 24.294\n", + " \n", + "epoch 2000 \n", + " Train set - loss: 1.002\n", + " Test set - loss: 25.177\n", + " \n", + "epoch 3000 \n", + " Train set - loss: 0.979\n", + " Test set - loss: 28.115\n", + " \n", + "epoch 4000 \n", + " Train set - loss: 0.974\n", + " Test set - loss: 31.27\n", + " \n", + "epoch 5000 \n", + " Train set - loss: 0.929\n", + " Test set - loss: 35.596\n", + " \n", + "epoch 6000 \n", + " Train set - loss: 0.956\n", + " Test set - loss: 39.096\n", + " \n", + "epoch 7000 \n", + " Train set - loss: 0.944\n", + " Test set - loss: 39.886\n", + " \n", + "epoch 8000 \n", + " Train set - loss: 0.951\n", + " Test set - loss: 44.383\n", + " \n", + "epoch 9000 \n", + " Train set - loss: 0.976\n", + " Test set - loss: 46.715\n", + " \n", + "epoch 10000 \n", + " Train set - loss: 0.907\n", + " Test set - loss: 48.878\n", + " \n", + "epoch 11000 \n", + " Train set - loss: 0.957\n", + " Test set - loss: 49.986\n", + " \n", + "epoch 12000 \n", + " Train set - loss: 0.998\n", + " Test set - loss: 52.608\n", + " \n", + "epoch 13000 \n", + " Train set - loss: 0.986\n", + " Test set - loss: 51.419\n", + " \n", + "epoch 14000 \n", + " Train set - loss: 0.984\n", + " Test set - loss: 55.804\n", + " \n", + "epoch 15000 \n", + " Train set - loss: 0.965\n", + " Test set - loss: 57.902\n", + " \n", + "epoch 16000 \n", + " Train set - loss: 0.905\n", + " Test set - loss: 57.022\n", + " \n", + "epoch 17000 \n", + " Train set - loss: 0.96\n", + " Test set - loss: 53.676\n", + " \n", + "epoch 18000 \n", + " Train set - loss: 0.939\n", + " Test set - loss: 62.478\n", + " \n", + "epoch 19000 \n", + " Train set - loss: 0.93\n", + " Test set - loss: 61.828\n", + " \n", + "epoch 0 \n", + " Train set - loss: 6.793\n", + " Test set - loss: 6.794\n", + " \n", + "epoch 1000 \n", + " Train set - loss: 1.063\n", + " Test set - loss: 23.191\n", + " \n", + "epoch 2000 \n", + " Train set - loss: 1.032\n", + " Test set - loss: 26.461\n", + " \n", + "epoch 3000 \n", + " Train set - loss: 1.02\n", + " Test set - loss: 29.392\n", + " \n", + "epoch 4000 \n", + " Train set - loss: 0.932\n", + " Test set - loss: 33.168\n", + " \n", + "epoch 5000 \n", + " Train set - loss: 1.017\n", + " Test set - loss: 34.574\n", + " \n", + "epoch 6000 \n", + " Train set - loss: 0.975\n", + " Test set - loss: 38.711\n", + " \n", + "epoch 7000 \n", + " Train set - loss: 0.953\n", + " Test set - loss: 39.829\n", + " \n", + "epoch 8000 \n", + " Train set - loss: 0.91\n", + " Test set - loss: 41.895\n", + " \n", + "epoch 9000 \n", + " Train set - loss: 0.989\n", + " Test set - loss: 45.25\n", + " \n", + "epoch 10000 \n", + " Train set - loss: 1.0\n", + " Test set - loss: 46.407\n", + " \n", + "epoch 11000 \n", + " Train set - loss: 0.98\n", + " Test set - loss: 50.797\n", + " \n", + "epoch 12000 \n", + " Train set - loss: 0.983\n", + " Test set - loss: 53.173\n", + " \n", + "epoch 13000 \n", + " Train set - loss: 0.925\n", + " Test set - loss: 54.291\n", + " \n", + "epoch 14000 \n", + " Train set - loss: 0.926\n", + " Test set - loss: 54.929\n", + " \n", + "epoch 15000 \n", + " Train set - loss: 0.986\n", + " Test set - loss: 58.36\n", + " \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch 16000 \n", + " Train set - loss: 0.944\n", + " Test set - loss: 57.972\n", + " \n", + "epoch 17000 \n", + " Train set - loss: 0.963\n", + " Test set - loss: 58.177\n", + " \n", + "epoch 18000 \n", + " Train set - loss: 0.967\n", + " Test set - loss: 57.693\n", + " \n", + "epoch 19000 \n", + " Train set - loss: 0.97\n", + " Test set - loss: 62.002\n", + " \n", + "epoch 0 \n", + " Train set - loss: 6.793\n", + " Test set - loss: 6.798\n", + " \n", + "epoch 1000 \n", + " Train set - loss: 1.046\n", + " Test set - loss: 24.413\n", + " \n", + "epoch 2000 \n", + " Train set - loss: 0.981\n", + " Test set - loss: 28.192\n", + " \n", + "epoch 3000 \n", + " Train set - loss: 0.966\n", + " Test set - loss: 29.734\n", + " \n", + "epoch 4000 \n", + " Train set - loss: 0.989\n", + " Test set - loss: 34.306\n", + " \n", + "epoch 5000 \n", + " Train set - loss: 0.967\n", + " Test set - loss: 34.852\n", + " \n", + "epoch 6000 \n", + " Train set - loss: 0.902\n", + " Test set - loss: 37.421\n", + " \n", + "epoch 7000 \n", + " Train set - loss: 0.94\n", + " Test set - loss: 37.481\n", + " \n", + "epoch 8000 \n", + " Train set - loss: 0.951\n", + " Test set - loss: 40.332\n", + " \n", + "epoch 9000 \n", + " Train set - loss: 0.945\n", + " Test set - loss: 48.709\n", + " \n", + "epoch 10000 \n", + " Train set - loss: 0.967\n", + " Test set - loss: 50.611\n", + " \n", + "epoch 11000 \n", + " Train set - loss: 0.99\n", + " Test set - loss: 49.536\n", + " \n", + "epoch 12000 \n", + " Train set - loss: 0.991\n", + " Test set - loss: 53.281\n", + " \n", + "epoch 13000 \n", + " Train set - loss: 0.911\n", + " Test set - loss: 53.05\n", + " \n", + "epoch 14000 \n", + " Train set - loss: 0.952\n", + " Test set - loss: 56.761\n", + " \n", + "epoch 15000 \n", + " Train set - loss: 0.97\n", + " Test set - loss: 57.142\n", + " \n", + "epoch 16000 \n", + " Train set - loss: 0.921\n", + " Test set - loss: 57.22\n", + " \n", + "epoch 17000 \n", + " Train set - loss: 0.937\n", + " Test set - loss: 59.433\n", + " \n", + "epoch 18000 \n", + " Train set - loss: 0.964\n", + " Test set - loss: 58.954\n", + " \n", + "epoch 19000 \n", + " Train set - loss: 0.91\n", + " Test set - loss: 57.752\n", + " \n", + "epoch 0 \n", + " Train set - loss: 6.797\n", + " Test set - loss: 6.793\n", + " \n", + "epoch 1000 \n", + " Train set - loss: 1.052\n", + " Test set - loss: 25.378\n", + " \n", + "epoch 2000 \n", + " Train set - loss: 0.967\n", + " Test set - loss: 30.641\n", + " \n", + "epoch 3000 \n", + " Train set - loss: 0.97\n", + " Test set - loss: 32.983\n", + " \n", + "epoch 4000 \n", + " Train set - loss: 0.931\n", + " Test set - loss: 35.008\n", + " \n", + "epoch 5000 \n", + " Train set - loss: 0.95\n", + " Test set - loss: 38.592\n", + " \n", + "epoch 6000 \n", + " Train set - loss: 0.961\n", + " Test set - loss: 41.785\n", + " \n", + "epoch 7000 \n", + " Train set - loss: 0.93\n", + " Test set - loss: 46.456\n", + " \n", + "epoch 8000 \n", + " Train set - loss: 0.977\n", + " Test set - loss: 46.483\n", + " \n", + "epoch 9000 \n", + " Train set - loss: 0.955\n", + " Test set - loss: 48.554\n", + " \n", + "epoch 10000 \n", + " Train set - loss: 0.941\n", + " Test set - loss: 53.479\n", + " \n", + "epoch 11000 \n", + " Train set - loss: 1.003\n", + " Test set - loss: 51.243\n", + " \n", + "epoch 12000 \n", + " Train set - loss: 0.987\n", + " Test set - loss: 55.073\n", + " \n", + "epoch 13000 \n", + " Train set - loss: 0.995\n", + " Test set - loss: 56.564\n", + " \n", + "epoch 14000 \n", + " Train set - loss: 0.953\n", + " Test set - loss: 55.438\n", + " \n", + "epoch 15000 \n", + " Train set - loss: 0.911\n", + " Test set - loss: 58.512\n", + " \n", + "epoch 16000 \n", + " Train set - loss: 0.922\n", + " Test set - loss: 57.445\n", + " \n", + "epoch 17000 \n", + " Train set - loss: 0.949\n", + " Test set - loss: 60.568\n", + " \n", + "epoch 18000 \n", + " Train set - loss: 0.984\n", + " Test set - loss: 60.303\n", + " \n", + "epoch 19000 \n", + " Train set - loss: 0.962\n", + " Test set - loss: 63.902\n", + " \n", + "100%|██████████| 10/10 [3:22:15<00:00, 1213.59s/trial, best loss: -0.0823433019254404]\n", + "epoch 0\n", + " Train set - loss: 6.842\n", + " Test set - loss: 6.834\n", + " \n", + "epoch 1000\n", + " Train set - loss: 1.101\n", + " Test set - loss: 25.026\n", + " \n", + "epoch 2000\n", + " Train set - loss: 0.971\n", + " Test set - loss: 28.552\n", + " \n", + "epoch 3000\n", + " Train set - loss: 0.989\n", + " Test set - loss: 32.089\n", + " \n", + "epoch 4000\n", + " Train set - loss: 0.99\n", + " Test set - loss: 33.257\n", + " \n", + "epoch 5000\n", + " Train set - loss: 0.985\n", + " Test set - loss: 36.744\n", + " \n", + "epoch 6000\n", + " Train set - loss: 0.971\n", + " Test set - loss: 38.915\n", + " \n", + "epoch 7000\n", + " Train set - loss: 0.977\n", + " Test set - loss: 40.527\n", + " \n", + "epoch 8000\n", + " Train set - loss: 1.013\n", + " Test set - loss: 42.967\n", + " \n", + "epoch 9000\n", + " Train set - loss: 0.981\n", + " Test set - loss: 44.936\n", + " \n", + "epoch 10000\n", + " Train set - loss: 0.975\n", + " Test set - loss: 52.466\n", + " \n", + "epoch 11000\n", + " Train set - loss: 0.949\n", + " Test set - loss: 50.95\n", + " \n", + "epoch 12000\n", + " Train set - loss: 0.933\n", + " Test set - loss: 51.5\n", + " \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch 13000\n", + " Train set - loss: 1.023\n", + " Test set - loss: 54.636\n", + " \n", + "epoch 14000\n", + " Train set - loss: 0.987\n", + " Test set - loss: 59.892\n", + " \n", + "epoch 15000\n", + " Train set - loss: 0.996\n", + " Test set - loss: 57.323\n", + " \n", + "epoch 16000\n", + " Train set - loss: 0.989\n", + " Test set - loss: 61.067\n", + " \n", + "epoch 17000\n", + " Train set - loss: 0.969\n", + " Test set - loss: 64.222\n", + " \n", + "epoch 18000\n", + " Train set - loss: 0.925\n", + " Test set - loss: 62.306\n", + " \n", + "epoch 19000\n", + " Train set - loss: 1.006\n", + " Test set - loss: 63.963\n", + " \n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0NNRecommender0.0052650.0151370.0204010.0322470.0052650.0109760.0131430.01686
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best parameters:\n", + "{'n_neg_per_pos': 5.0}\n" ] } ], @@ -1581,19 +2439,50 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 434, "id": "given-homework", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "epoch 0\n", - " Train set - loss: 6.842\n", - " Test set - loss: 6.843\n", - " \n" - ] + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RecommenderHR@1HR@3HR@5HR@10NDCG@1NDCG@3NDCG@5NDCG@10
0NNRecommender0.0250080.0352090.0664690.1168150.0250080.03110.0436970.059459
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -1844,14 +2733,16 @@ }, { "cell_type": "markdown", - "id": "1b89411a", + "id": "8caf15c1", "metadata": {}, "source": [ "What did not work:\n", - "- I tried to use softmax, it wasn't good idea\n", - "- Firstly, I copy and paste without thinking some code from tutorial for binary linear regresion. BCELoss is not a good idea for mutli-classification.\n", + "- I tried to use softmax, it wasn't a good idea\n", + "- Firstly, I copied and pasted some code without thinking from tutorial for binary linear regresion. BCELoss is not a good idea for mutli-classification.\n", "- More layers don't mean better results.\n", "- More epochs don't always mean better results.\n", + "- PReLU was a lot slower than ReLU and it did not give me better results.\n", + "- For some reason, n_neg_per_pos I got from fitting wasn't the best fit. With one point bigger n_neg_per_pos I got better results. \n", "\n", "What did work well:\n", "- Dropout layer increased results significantly (from HR@10 0.03 to 0.116).\n", @@ -1861,8 +2752,7 @@ "How to further improve model:\n", "- Add more data or more features\n", "- Work on network layout\n", - " \n", - "\n" + "- Try using \"One vs All\" layout. " ] } ],