{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "alike-morgan", "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "%load_ext autoreload\n", "%autoreload 2\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from IPython.display import Markdown, display, HTML\n", "from collections import defaultdict\n", "\n", "# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)\n", "import os\n", "os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'" ] }, { "cell_type": "markdown", "id": "blessed-knitting", "metadata": {}, "source": [ "# Load the dataset for recommenders" ] }, { "cell_type": "code", "execution_count": null, "id": "victorian-bottom", "metadata": {}, "outputs": [], "source": [ "data_path = os.path.join(\"data\", \"hotel_data\")\n", "\n", "interactions_df = pd.read_csv(os.path.join(data_path, \"hotel_data_interactions_df.csv\"), index_col=0)\n", "\n", "base_item_features = ['term', 'length_of_stay_bucket', 'rate_plan', 'room_segment', 'n_people_bucket', 'weekend_stay']\n", "\n", "column_values_dict = {\n", " 'term': ['WinterVacation', 'Easter', 'OffSeason', 'HighSeason', 'LowSeason', 'MayLongWeekend', 'NewYear', 'Christmas'],\n", " 'length_of_stay_bucket': ['[0-1]', '[2-3]', '[4-7]', '[8-inf]'],\n", " 'rate_plan': ['Standard', 'Nonref'],\n", " 'room_segment': ['[0-160]', '[160-260]', '[260-360]', '[360-500]', '[500-900]'],\n", " 'n_people_bucket': ['[1-1]', '[2-2]', '[3-4]', '[5-inf]'],\n", " 'weekend_stay': ['True', 'False']\n", "}\n", "\n", "interactions_df.loc[:, 'term'] = pd.Categorical(\n", " interactions_df['term'], categories=column_values_dict['term'])\n", "interactions_df.loc[:, 'length_of_stay_bucket'] = pd.Categorical(\n", " interactions_df['length_of_stay_bucket'], categories=column_values_dict['length_of_stay_bucket'])\n", "interactions_df.loc[:, 'rate_plan'] = pd.Categorical(\n", " interactions_df['rate_plan'], categories=column_values_dict['rate_plan'])\n", "interactions_df.loc[:, 'room_segment'] = pd.Categorical(\n", " interactions_df['room_segment'], categories=column_values_dict['room_segment'])\n", "interactions_df.loc[:, 'n_people_bucket'] = pd.Categorical(\n", " interactions_df['n_people_bucket'], categories=column_values_dict['n_people_bucket'])\n", "interactions_df.loc[:, 'weekend_stay'] = interactions_df['weekend_stay'].astype('str')\n", "interactions_df.loc[:, 'weekend_stay'] = pd.Categorical(\n", " interactions_df['weekend_stay'], categories=column_values_dict['weekend_stay'])\n", "\n", "display(HTML(interactions_df.head(15).to_html()))" ] }, { "cell_type": "markdown", "id": "realistic-third", "metadata": {}, "source": [ "# Define user features based on reservations\n", "\n", "The content-based recommenders will be forecasting the probability of interaction between user and item based on user features vector and item features vector:\n", "\n", "