drzewo decyzyjne

2022-05-12 14:30:39 +02:00 · 2022-05-12 14:30:39 +02:00 · 4020cc71d9
commit 4020cc71d9
parent d7d904a4be
3 changed files with 139 additions and 4 deletions
--- a/data.xlsx
+++ b/data.xlsx
--- a/dishes.json
+++ b/dishes.json
@ -190,7 +190,7 @@
 		"name": "grzybowa",
 		"pos_in_card": 17,
 		"price": 37,
-		"spiciness": false,
+		"spiciness": true,
 		"vege": true,
 		"size": 50,
 		"allergens": "olives",
@ -300,7 +300,7 @@
 		"name": "zielona",
 		"pos_in_card": 27,
 		"price": 50,
-		"spiciness": false,
+		"spiciness": true,
 		"vege": true,
 		"size": 30,
 		"allergens": "olives",
--- a/tiles.py
+++ b/tiles.py
@ -65,7 +65,6 @@ def generate_client():
    for i in chairs:
        for j in i:
            loc_for_client.append(j.loc)
-
    loc = (random.randint(0, len(loc_for_client)))
    client_coordinates = (loc_for_client[loc])
    return client_coordinates
@ -456,6 +455,135 @@ def evaluate_preferences(preferences):

    return data

+
+# decision tree ręcznie
+class GadId3Classifier:
+
+    def fit(self, input, output):
+        data = input.copy()
+        data[output.name] = output
+        self.tree = self.decision_tree(data, data, input.columns, output.name)
+
+    def predict(self, input):
+        samples = input.to_dict(orient='records')
+        predictions = []
+        for sample in samples:
+            predictions.append(self.make_prediction(sample, self.tree, 1.0))
+
+        return predictions
+
+    def entropy(self, attribute_column):
+        values, counts = np.unique(attribute_column, return_counts=True)
+        entropy_list = []
+        for i in range(len(values)):
+            probability = counts[i] / np.sum(counts)
+            entropy_list.append(-probability * np.log2(probability))
+
+        total_entropy = np.sum(entropy_list)
+        return total_entropy
+
+    def information_gain(self, data, feature_attribute_name, target_attribute_name):
+        total_entropy = self.entropy(data[target_attribute_name])
+
+        values, counts = np.unique(data[feature_attribute_name], return_counts=True)
+
+        weighted_entropy_list = []
+
+        for i in range(len(values)):
+            subset_probability = counts[i] / np.sum(counts)
+            subset_entropy = self.entropy(
+                data.where(data[feature_attribute_name] == values[i]).dropna()[target_attribute_name])
+            weighted_entropy_list.append(subset_probability * subset_entropy)
+
+        total_weighted_entropy = np.sum(weighted_entropy_list)
+
+        information_gain = total_entropy - total_weighted_entropy
+
+        return information_gain
+
+    def decision_tree(self, data, orginal_data, feature_attribute_names, target_attribute_name, parent_node_class=None):
+        unique_classes = np.unique(data[target_attribute_name])
+        if len(unique_classes) <= 1:
+            return unique_classes[0]
+        elif len(data) == 0:
+            majority_class_index = np.argmax(np.unique(original_data[target_attribute_name], return_counts=True)[1])
+            return np.unique(original_data[target_attribute_name])[majority_class_index]
+        elif len(feature_attribute_names) == 0:
+            return parent_node_class
+        else:
+            majority_class_index = np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])
+            parent_node_class = unique_classes[majority_class_index]
+            ig_values = [self.information_gain(data, feature, target_attribute_name) for feature in
+                         feature_attribute_names]
+            best_feature_index = np.argmax(ig_values)
+            best_feature = feature_attribute_names[best_feature_index]
+
+            tree = {best_feature: {}}
+
+            feature_attribute_names = [i for i in feature_attribute_names if i != best_feature]
+
+            parent_attribute_values = np.unique(data[best_feature])
+            for value in parent_attribute_values:
+                sub_data = data.where(data[best_feature] == value).dropna()
+
+                subtree = self.decision_tree(sub_data, orginal_data, feature_attribute_names, target_attribute_name,
+                                             parent_node_class)
+
+                tree[best_feature][value] = subtree
+
+        return tree
+
+    def make_prediction(self, sample, tree, default=1):
+        for attribute in list(sample.keys()):
+            if attribute in list(tree.keys()):
+                try:
+                    result = tree[attribute][sample[attribute]]
+                except:
+                    return default
+
+                result = tree[attribute][sample[attribute]]
+
+                if isinstance(result, dict):
+                    return self.make_prediction(sample, result)
+                else:
+                    return result
+
+def train_id3(prefernce):
+    df = pd.read_excel("data.xlsx")
+    d = {'low': 30, 'high': 50}
+    df['level of hunger'] = df['level of hunger'].map(d)
+
+    d = {'none': 0, 'tomato': 1, 'feta': 2, 'olives': 3}
+    df['allergy'] = df['allergy'].map(d)
+
+    d = {'none': 0, 'salami': 1, 'mushrooms': 2, 'pineapple': 3, 'shrimps': 4, 'sausage': 5}
+    df['favorite ingridient'] = df['favorite ingridient'].map(d)
+
+    d = {'margherita': 0, 'hawajska': 1, 'funghi': 2, 'light': 3, '4 sery': 4, 'pepperoni': 5,
+         'salami': 6, 'wegetarianska': 7, 'barbecue': 8, 'miesna': 9, 'paprykowa': 10,
+         'jalapeno': 11, 'barbecue wege': 12, 'kebab': 13, 'grecka': 14, 'piekielna': 15,
+         'drwala': 16, 'grzybowa': 17, 'staropolska': 18, 'goralska': 19, 'prosciutto': 20,
+         'broccoli': 21, 'americana': 22, 'farmerska': 23, 'nachos': 24, 'texas': 25,
+         'kurczak': 26, 'zielona': 27, 'mix': 28}
+
+    df['pizza'] = df['pizza'].map(d)
+
+    features = ['budget', 'spiciness', 'vege', 'level of hunger', 'allergy', 'favorite ingridient', 'drink in']
+    X = df[features]
+    y = df['pizza']
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y)
+
+    model = GadId3Classifier()
+    model.fit(X_train, y_train)
+
+    pre = [prefernce]
+    df = pd.DataFrame(pre, columns=['budget','spiciness','vege','level of hunger','allergy','favorite ingridient','drink in'])
+
+    return model.predict(df)
+
+
+# decision tree z biblioteka
 def choose_pizza(prefernce):
    df = pd.read_excel("data.xlsx")

@ -581,9 +709,16 @@ def main():
            print(ingridients)
            print()
            evaluated_ingridients = evaluate_preferences(ingridients)
+
+            print("recznie drzewo")
+            num = train_id3(evaluated_ingridients)
+            piz = get_pizza(int(num[0]))
+            print("Name = {}, pos_in_card - {}, price = {}, spiciness = {}, vege = {}, size = {}, allergens = {}, ingridients = {}, drink_in = {}\n"
+                .format(piz.name, piz.pos_in_card, piz.price, piz.spiciness, piz.vege, piz.size,piz.allergens, piz.ingridients, piz.drink_in))
+
            number_of_pizza = choose_pizza(evaluated_ingridients)
            pizza = get_pizza(number_of_pizza)
-            print("In case we don't offer pizza with identical ingredients, we offer:")
+            print("drzewo z biblioteka")
            print("Name = {}, pos_in_card - {}, price = {}, spiciness = {}, vege = {}, size = {}, allergens = {}, ingridients = {}, drink_in = {}\n"
                  .format(pizza.name,pizza.pos_in_card,pizza.price, pizza.spiciness,pizza.vege,pizza.size,pizza.allergens,pizza.ingridients,pizza.drink_in))