diff --git a/data.xlsx b/data.xlsx index 810f6a7..aede1fd 100644 Binary files a/data.xlsx and b/data.xlsx differ diff --git a/dishes.json b/dishes.json index 56d0da2..a339ae3 100644 --- a/dishes.json +++ b/dishes.json @@ -190,7 +190,7 @@ "name": "grzybowa", "pos_in_card": 17, "price": 37, - "spiciness": false, + "spiciness": true, "vege": true, "size": 50, "allergens": "olives", @@ -300,7 +300,7 @@ "name": "zielona", "pos_in_card": 27, "price": 50, - "spiciness": false, + "spiciness": true, "vege": true, "size": 30, "allergens": "olives", diff --git a/tiles.py b/tiles.py index 86e9b08..088cbf6 100644 --- a/tiles.py +++ b/tiles.py @@ -65,7 +65,6 @@ def generate_client(): for i in chairs: for j in i: loc_for_client.append(j.loc) - loc = (random.randint(0, len(loc_for_client))) client_coordinates = (loc_for_client[loc]) return client_coordinates @@ -456,6 +455,135 @@ def evaluate_preferences(preferences): return data + +# decision tree ręcznie +class GadId3Classifier: + + def fit(self, input, output): + data = input.copy() + data[output.name] = output + self.tree = self.decision_tree(data, data, input.columns, output.name) + + def predict(self, input): + samples = input.to_dict(orient='records') + predictions = [] + for sample in samples: + predictions.append(self.make_prediction(sample, self.tree, 1.0)) + + return predictions + + def entropy(self, attribute_column): + values, counts = np.unique(attribute_column, return_counts=True) + entropy_list = [] + for i in range(len(values)): + probability = counts[i] / np.sum(counts) + entropy_list.append(-probability * np.log2(probability)) + + total_entropy = np.sum(entropy_list) + return total_entropy + + def information_gain(self, data, feature_attribute_name, target_attribute_name): + total_entropy = self.entropy(data[target_attribute_name]) + + values, counts = np.unique(data[feature_attribute_name], return_counts=True) + + weighted_entropy_list = [] + + for i in range(len(values)): + subset_probability = counts[i] / np.sum(counts) + subset_entropy = self.entropy( + data.where(data[feature_attribute_name] == values[i]).dropna()[target_attribute_name]) + weighted_entropy_list.append(subset_probability * subset_entropy) + + total_weighted_entropy = np.sum(weighted_entropy_list) + + information_gain = total_entropy - total_weighted_entropy + + return information_gain + + def decision_tree(self, data, orginal_data, feature_attribute_names, target_attribute_name, parent_node_class=None): + unique_classes = np.unique(data[target_attribute_name]) + if len(unique_classes) <= 1: + return unique_classes[0] + elif len(data) == 0: + majority_class_index = np.argmax(np.unique(original_data[target_attribute_name], return_counts=True)[1]) + return np.unique(original_data[target_attribute_name])[majority_class_index] + elif len(feature_attribute_names) == 0: + return parent_node_class + else: + majority_class_index = np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1]) + parent_node_class = unique_classes[majority_class_index] + ig_values = [self.information_gain(data, feature, target_attribute_name) for feature in + feature_attribute_names] + best_feature_index = np.argmax(ig_values) + best_feature = feature_attribute_names[best_feature_index] + + tree = {best_feature: {}} + + feature_attribute_names = [i for i in feature_attribute_names if i != best_feature] + + parent_attribute_values = np.unique(data[best_feature]) + for value in parent_attribute_values: + sub_data = data.where(data[best_feature] == value).dropna() + + subtree = self.decision_tree(sub_data, orginal_data, feature_attribute_names, target_attribute_name, + parent_node_class) + + tree[best_feature][value] = subtree + + return tree + + def make_prediction(self, sample, tree, default=1): + for attribute in list(sample.keys()): + if attribute in list(tree.keys()): + try: + result = tree[attribute][sample[attribute]] + except: + return default + + result = tree[attribute][sample[attribute]] + + if isinstance(result, dict): + return self.make_prediction(sample, result) + else: + return result + +def train_id3(prefernce): + df = pd.read_excel("data.xlsx") + d = {'low': 30, 'high': 50} + df['level of hunger'] = df['level of hunger'].map(d) + + d = {'none': 0, 'tomato': 1, 'feta': 2, 'olives': 3} + df['allergy'] = df['allergy'].map(d) + + d = {'none': 0, 'salami': 1, 'mushrooms': 2, 'pineapple': 3, 'shrimps': 4, 'sausage': 5} + df['favorite ingridient'] = df['favorite ingridient'].map(d) + + d = {'margherita': 0, 'hawajska': 1, 'funghi': 2, 'light': 3, '4 sery': 4, 'pepperoni': 5, + 'salami': 6, 'wegetarianska': 7, 'barbecue': 8, 'miesna': 9, 'paprykowa': 10, + 'jalapeno': 11, 'barbecue wege': 12, 'kebab': 13, 'grecka': 14, 'piekielna': 15, + 'drwala': 16, 'grzybowa': 17, 'staropolska': 18, 'goralska': 19, 'prosciutto': 20, + 'broccoli': 21, 'americana': 22, 'farmerska': 23, 'nachos': 24, 'texas': 25, + 'kurczak': 26, 'zielona': 27, 'mix': 28} + + df['pizza'] = df['pizza'].map(d) + + features = ['budget', 'spiciness', 'vege', 'level of hunger', 'allergy', 'favorite ingridient', 'drink in'] + X = df[features] + y = df['pizza'] + + X_train, X_test, y_train, y_test = train_test_split(X, y) + + model = GadId3Classifier() + model.fit(X_train, y_train) + + pre = [prefernce] + df = pd.DataFrame(pre, columns=['budget','spiciness','vege','level of hunger','allergy','favorite ingridient','drink in']) + + return model.predict(df) + + +# decision tree z biblioteka def choose_pizza(prefernce): df = pd.read_excel("data.xlsx") @@ -581,9 +709,16 @@ def main(): print(ingridients) print() evaluated_ingridients = evaluate_preferences(ingridients) + + print("recznie drzewo") + num = train_id3(evaluated_ingridients) + piz = get_pizza(int(num[0])) + print("Name = {}, pos_in_card - {}, price = {}, spiciness = {}, vege = {}, size = {}, allergens = {}, ingridients = {}, drink_in = {}\n" + .format(piz.name, piz.pos_in_card, piz.price, piz.spiciness, piz.vege, piz.size,piz.allergens, piz.ingridients, piz.drink_in)) + number_of_pizza = choose_pizza(evaluated_ingridients) pizza = get_pizza(number_of_pizza) - print("In case we don't offer pizza with identical ingredients, we offer:") + print("drzewo z biblioteka") print("Name = {}, pos_in_card - {}, price = {}, spiciness = {}, vege = {}, size = {}, allergens = {}, ingridients = {}, drink_in = {}\n" .format(pizza.name,pizza.pos_in_card,pizza.price, pizza.spiciness,pizza.vege,pizza.size,pizza.allergens,pizza.ingridients,pizza.drink_in))