From df9d873768c69e7efe763b21f5ffe8af96f572c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szuszert?= Date: Thu, 12 May 2022 16:00:04 +0200 Subject: [PATCH] drzewo decyzyjne --- dishes.json | 2 +- tiles.py | 150 +++------------------------------------------------- 2 files changed, 8 insertions(+), 144 deletions(-) diff --git a/dishes.json b/dishes.json index a339ae3..c9e03c3 100644 --- a/dishes.json +++ b/dishes.json @@ -293,7 +293,7 @@ "vege": false, "size": 50, "allergens": "tomato", - "ingridients": [ "cheese", "chicken", "onion", "corn", "toamto" ], + "ingridients": [ "cheese", "chicken", "onion", "corn", "tomato" ], "drink_in": true }, { diff --git a/tiles.py b/tiles.py index 088cbf6..ce31ae0 100644 --- a/tiles.py +++ b/tiles.py @@ -456,136 +456,8 @@ def evaluate_preferences(preferences): return data -# decision tree ręcznie -class GadId3Classifier: - - def fit(self, input, output): - data = input.copy() - data[output.name] = output - self.tree = self.decision_tree(data, data, input.columns, output.name) - - def predict(self, input): - samples = input.to_dict(orient='records') - predictions = [] - for sample in samples: - predictions.append(self.make_prediction(sample, self.tree, 1.0)) - - return predictions - - def entropy(self, attribute_column): - values, counts = np.unique(attribute_column, return_counts=True) - entropy_list = [] - for i in range(len(values)): - probability = counts[i] / np.sum(counts) - entropy_list.append(-probability * np.log2(probability)) - - total_entropy = np.sum(entropy_list) - return total_entropy - - def information_gain(self, data, feature_attribute_name, target_attribute_name): - total_entropy = self.entropy(data[target_attribute_name]) - - values, counts = np.unique(data[feature_attribute_name], return_counts=True) - - weighted_entropy_list = [] - - for i in range(len(values)): - subset_probability = counts[i] / np.sum(counts) - subset_entropy = self.entropy( - data.where(data[feature_attribute_name] == values[i]).dropna()[target_attribute_name]) - weighted_entropy_list.append(subset_probability * subset_entropy) - - total_weighted_entropy = np.sum(weighted_entropy_list) - - information_gain = total_entropy - total_weighted_entropy - - return information_gain - - def decision_tree(self, data, orginal_data, feature_attribute_names, target_attribute_name, parent_node_class=None): - unique_classes = np.unique(data[target_attribute_name]) - if len(unique_classes) <= 1: - return unique_classes[0] - elif len(data) == 0: - majority_class_index = np.argmax(np.unique(original_data[target_attribute_name], return_counts=True)[1]) - return np.unique(original_data[target_attribute_name])[majority_class_index] - elif len(feature_attribute_names) == 0: - return parent_node_class - else: - majority_class_index = np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1]) - parent_node_class = unique_classes[majority_class_index] - ig_values = [self.information_gain(data, feature, target_attribute_name) for feature in - feature_attribute_names] - best_feature_index = np.argmax(ig_values) - best_feature = feature_attribute_names[best_feature_index] - - tree = {best_feature: {}} - - feature_attribute_names = [i for i in feature_attribute_names if i != best_feature] - - parent_attribute_values = np.unique(data[best_feature]) - for value in parent_attribute_values: - sub_data = data.where(data[best_feature] == value).dropna() - - subtree = self.decision_tree(sub_data, orginal_data, feature_attribute_names, target_attribute_name, - parent_node_class) - - tree[best_feature][value] = subtree - - return tree - - def make_prediction(self, sample, tree, default=1): - for attribute in list(sample.keys()): - if attribute in list(tree.keys()): - try: - result = tree[attribute][sample[attribute]] - except: - return default - - result = tree[attribute][sample[attribute]] - - if isinstance(result, dict): - return self.make_prediction(sample, result) - else: - return result - -def train_id3(prefernce): - df = pd.read_excel("data.xlsx") - d = {'low': 30, 'high': 50} - df['level of hunger'] = df['level of hunger'].map(d) - - d = {'none': 0, 'tomato': 1, 'feta': 2, 'olives': 3} - df['allergy'] = df['allergy'].map(d) - - d = {'none': 0, 'salami': 1, 'mushrooms': 2, 'pineapple': 3, 'shrimps': 4, 'sausage': 5} - df['favorite ingridient'] = df['favorite ingridient'].map(d) - - d = {'margherita': 0, 'hawajska': 1, 'funghi': 2, 'light': 3, '4 sery': 4, 'pepperoni': 5, - 'salami': 6, 'wegetarianska': 7, 'barbecue': 8, 'miesna': 9, 'paprykowa': 10, - 'jalapeno': 11, 'barbecue wege': 12, 'kebab': 13, 'grecka': 14, 'piekielna': 15, - 'drwala': 16, 'grzybowa': 17, 'staropolska': 18, 'goralska': 19, 'prosciutto': 20, - 'broccoli': 21, 'americana': 22, 'farmerska': 23, 'nachos': 24, 'texas': 25, - 'kurczak': 26, 'zielona': 27, 'mix': 28} - - df['pizza'] = df['pizza'].map(d) - - features = ['budget', 'spiciness', 'vege', 'level of hunger', 'allergy', 'favorite ingridient', 'drink in'] - X = df[features] - y = df['pizza'] - - X_train, X_test, y_train, y_test = train_test_split(X, y) - - model = GadId3Classifier() - model.fit(X_train, y_train) - - pre = [prefernce] - df = pd.DataFrame(pre, columns=['budget','spiciness','vege','level of hunger','allergy','favorite ingridient','drink in']) - - return model.predict(df) - - -# decision tree z biblioteka def choose_pizza(prefernce): - df = pd.read_excel("data.xlsx") + df = pd.read_excel("restaurant.xlsx") d = {'low': 30, 'high': 50} df['level of hunger'] = df['level of hunger'].map(d) @@ -610,7 +482,7 @@ def choose_pizza(prefernce): y = df['pizza'] x_train, x_test, y_train, y_test = train_test_split(x, y) - clf = DecisionTreeClassifier(criterion='entropy') + clf = DecisionTreeClassifier(random_state=400) clf = clf.fit(x_train, y_train) return clf.predict([prefernce]) @@ -702,25 +574,17 @@ def main(): route = astar(map.get_arr(), (waiter.loc[1] // 32, waiter.loc[0] // 32), goal) direction = [(x[1] - y[1], x[0] - y[0]) for x, y in zip(route[1:], route)] break + print() print("Hello Sir, tell me yours preferences") print("Pass: 'budget', 'spiciness', 'vege', 'level_of_hunger', 'allergy', 'favorite_ingridient', 'drink_in'\n") print("Here is my list of preferences") ingridients = tell_preferences() print(ingridients) print() - evaluated_ingridients = evaluate_preferences(ingridients) - - print("recznie drzewo") - num = train_id3(evaluated_ingridients) - piz = get_pizza(int(num[0])) - print("Name = {}, pos_in_card - {}, price = {}, spiciness = {}, vege = {}, size = {}, allergens = {}, ingridients = {}, drink_in = {}\n" - .format(piz.name, piz.pos_in_card, piz.price, piz.spiciness, piz.vege, piz.size,piz.allergens, piz.ingridients, piz.drink_in)) - - number_of_pizza = choose_pizza(evaluated_ingridients) - pizza = get_pizza(number_of_pizza) - print("drzewo z biblioteka") - print("Name = {}, pos_in_card - {}, price = {}, spiciness = {}, vege = {}, size = {}, allergens = {}, ingridients = {}, drink_in = {}\n" - .format(pizza.name,pizza.pos_in_card,pizza.price, pizza.spiciness,pizza.vege,pizza.size,pizza.allergens,pizza.ingridients,pizza.drink_in)) + pizza = get_pizza(choose_pizza(evaluate_preferences(ingridients))) + print("Our proposition:") + print("Name = {}\nprice = {}\nspiciness = {}\nvege = {}\nsize = {}\nallergens = {}\ningridients = {}\ndrink_in = {}\n" + .format(pizza.name,pizza.price, pizza.spiciness,pizza.vege,pizza.size,pizza.allergens,pizza.ingridients,pizza.drink_in)) if len(direction) > 0: