89 lines
3.3 KiB
Python
89 lines
3.3 KiB
Python
# Load libraries ---------------------------------------------
|
|
|
|
from collections import defaultdict
|
|
import numpy as np
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
|
class DatasetSpecification(object):
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
# ################
|
|
# Original dataset functions
|
|
# ################
|
|
|
|
def get_sum_columns(self):
|
|
return ["n_people", "n_children_1", "n_children_2", "n_children_3", "accomodation_price", "meal_price",
|
|
"service_price", "paid", "n_rooms"]
|
|
|
|
def get_mean_columns(self):
|
|
return ['discount']
|
|
|
|
def get_mode_columns(self):
|
|
return ["room_id", "room_group_id", "date_from", "date_to", "booking_date", "rate_plan",
|
|
"length_of_stay", "book_to_arrival", "weekend_stay"]
|
|
|
|
def get_first_columns(self):
|
|
return ["user_id", "client_id", "client_name", "email", "phone", "is_company"]
|
|
|
|
def get_id_columns(self):
|
|
return ["client_id", "client_name", "email", "phone"]
|
|
|
|
# ################
|
|
# Output dataset functions
|
|
# ################
|
|
|
|
def get_people_df_id_columns(self):
|
|
return ['user_id']
|
|
|
|
def get_people_df_feature_columns(self):
|
|
return []
|
|
|
|
def get_items_df_id_columns(self):
|
|
return ['item_id']
|
|
|
|
def get_items_df_feature_columns(self):
|
|
return ['term', 'length_of_stay_bucket', 'rate_plan', 'room_segment', 'n_people_bucket', 'weekend_stay']
|
|
|
|
def get_purchases_df_id_columns(self):
|
|
return ['user_id', 'item_id']
|
|
|
|
def get_purchases_df_feature_columns(self):
|
|
return []
|
|
|
|
# ################
|
|
# Mapping functions
|
|
# ################
|
|
|
|
def get_nights_buckets(self):
|
|
return [[0, 1], [2, 3], [4, 7], [8, np.inf]]
|
|
|
|
def get_npeople_buckets(self):
|
|
return [[1, 1], [2, 2], [3, 4], [5, np.inf]]
|
|
|
|
def get_room_segment_buckets(self):
|
|
return [[0, 160], [160, 260], [260, 360], [360, 500], [500, 900], [900, np.inf]]
|
|
|
|
def get_book_to_arrival_buckets(self):
|
|
return [[0, 0], [1, 2], [3, 4], [5, 7], [8, 14], [15, 30], [31, 60], [61, 90], [91, 180], [181, np.inf]]
|
|
|
|
def get_arrival_terms(self):
|
|
arrival_terms = {"Easter": [{"start": {"m": np.nan, "d": np.nan}, "end": {"m": np.nan, "d": np.nan}}],
|
|
# Treated with priority
|
|
"Christmas": [{"start": {"m": 12, "d": 22}, "end": {"m": 12, "d": 27}}],
|
|
"NewYear": [{"start": {"m": 12, "d": 28}, "end": {"m": 1, "d": 4}}],
|
|
"WinterVacation": [{"start": {"m": 1, "d": 5}, "end": {"m": 2, "d": 29}}],
|
|
"OffSeason": [
|
|
{"start": {"m": 3, "d": 1}, "end": {"m": 4, "d": 27}},
|
|
{"start": {"m": 5, "d": 6}, "end": {"m": 6, "d": 20}},
|
|
{"start": {"m": 9, "d": 26}, "end": {"m": 12, "d": 21}}],
|
|
"MayLongWeekend": [{"start": {"m": 4, "d": 28}, "end": {"m": 5, "d": 5}}],
|
|
"LowSeason": [
|
|
{"start": {"m": 6, "d": 21}, "end": {"m": 7, "d": 10}},
|
|
{"start": {"m": 8, "d": 23}, "end": {"m": 9, "d": 25}}],
|
|
"HighSeason": [{"start": {"m": 7, "d": 11}, "end": {"m": 8, "d": 22}}]}
|
|
return arrival_terms
|