From bf6cef912ed1cb98af2a410feabcb4d43de0588b Mon Sep 17 00:00:00 2001 From: Stanislav Lytvynenko Date: Thu, 27 Jun 2024 02:50:38 +0200 Subject: [PATCH] dodalem drzewo decyzyjne --- ai-wozek/.idea/misc.xml | 2 +- ai-wozek/.idea/wozek.iml | 2 +- ai-wozek/decision_tree | 74 ++++++++++++++++++++++++++++++++++++++++ ai-wozek/wozek.py | 66 ++++++++++++++++++++++++++++++++++- 4 files changed, 141 insertions(+), 3 deletions(-) create mode 100644 ai-wozek/decision_tree diff --git a/ai-wozek/.idea/misc.xml b/ai-wozek/.idea/misc.xml index f47cba7..a9bb0db 100644 --- a/ai-wozek/.idea/misc.xml +++ b/ai-wozek/.idea/misc.xml @@ -3,5 +3,5 @@ - + \ No newline at end of file diff --git a/ai-wozek/.idea/wozek.iml b/ai-wozek/.idea/wozek.iml index acbca30..74d515a 100644 --- a/ai-wozek/.idea/wozek.iml +++ b/ai-wozek/.idea/wozek.iml @@ -4,7 +4,7 @@ - + \ No newline at end of file diff --git a/ai-wozek/decision_tree b/ai-wozek/decision_tree new file mode 100644 index 0000000..5bfb347 --- /dev/null +++ b/ai-wozek/decision_tree @@ -0,0 +1,74 @@ +digraph { + root [label=root] + Label_State [label=Label_State] + no [label=no] + Label_State -> no [label=""] + Height [label=Height] + no -> Height [label=""] + no [label=no shape=box] + Height -> no [label=medium] + no [label=no shape=box] + Height -> no [label=big] + small [label=small] + Height -> small [label=""] + Width [label=Width] + small -> Width [label=""] + small [label=small] + Width -> small [label=""] + Depth [label=Depth] + small -> Depth [label=""] + no [label=no shape=box] + Depth -> no [label=big] + yes [label=yes shape=box] + Depth -> yes [label=medium] + no [label=no shape=box] + Width -> no [label=big] + medium [label=medium] + Width -> medium [label=""] + Depth [label=Depth] + medium -> Depth [label=""] + no [label=no shape=box] + Depth -> no [label=big] + yes [label=yes shape=box] + Depth -> yes [label=medium] + yes [label=yes shape=box] + Depth -> yes [label=small] + yes [label=yes] + Label_State -> yes [label=""] + Damage [label=Damage] + yes -> Damage [label=""] + yes [label=yes shape=box] + Damage -> yes [label=no] + yes [label=yes] + Damage -> yes [label=""] + Height [label=Height] + yes -> Height [label=""] + no [label=no shape=box] + Height -> no [label=medium] + no [label=no shape=box] + Height -> no [label=big] + small [label=small] + Height -> small [label=""] + Width [label=Width] + small -> Width [label=""] + no [label=no shape=box] + Width -> no [label=big] + small [label=small] + Width -> small [label=""] + Depth [label=Depth] + small -> Depth [label=""] + yes [label=yes shape=box] + Depth -> yes [label=medium] + no [label=no shape=box] + Depth -> no [label=big] + yes [label=yes shape=box] + Depth -> yes [label=small] + medium [label=medium] + Width -> medium [label=""] + Value [label=Value] + medium -> Value [label=""] + yes [label=yes shape=box] + Value -> yes [label=cheap] + no [label=no shape=box] + Value -> no [label=expensive] +} diff --git a/ai-wozek/wozek.py b/ai-wozek/wozek.py index afd08aa..67cdce3 100644 --- a/ai-wozek/wozek.py +++ b/ai-wozek/wozek.py @@ -378,13 +378,77 @@ def astar(isstate,final): #drzewko tree_data_base = pd.read_csv('paczki.csv') + + def entropy(data): labels = data.iloc[:, -1] # Ostatnia kolumna zawiera etykiety klas i pomija 1 wiersz bo jest tytulowy - counts = labels.value_counts() #tu zlicza wszystkie opcje + counts = labels.value_counts() # tu zlicza wszystkie opcje probabilities = counts / len(labels) entropy = -sum(probabilities * np.log2(probabilities)) return entropy + +def information_gain(data, attribute): + total_entropy = entropy(data) + values = data[attribute].unique() # przypisujemy wszystkie opcje danego atrybutu np wyoski/niski/sredni + weighted_entropy = 0 + for value in values: + subset = data[data[attribute] == value] # przypisujesz wszystkie wiersze danego value do subset + subset_entropy = entropy(subset) + weighted_entropy += (len(subset) / len(data)) * subset_entropy + return total_entropy - weighted_entropy + + +def id3(data, attributes, target_attribute): + unique_targets = data[target_attribute].unique() + + # Jeśli wszystkie przykłady mają tę samą etykietę, zwróć tę etykietę + if len(unique_targets) == 1: + return unique_targets[0] + + # Jeśli zbiór atrybutów jest pusty, zwróć najczęstszą etykietę + if len(attributes) == 0: + return data[target_attribute].mode()[0] + + # Wybierz atrybut o największym przyroście informacji + info_gains = [(attr, information_gain(data, attr)) for attr in attributes] + best_attribute = max(info_gains, key=lambda x: x[1])[0] + + # Tworzymy węzeł drzewa + tree = {best_attribute: {}} + + # Usuwamy wybrany atrybut z listy atrybutów + attributes = [attr for attr in attributes if attr != best_attribute] + + # Dla każdej wartości wybranego atrybutu tworzę gałąź drzewa + for value in data[best_attribute].unique(): + subset = data[data[best_attribute] == value] + subtree = id3(subset, attributes, target_attribute) + tree[best_attribute][value] = subtree + + return tree + + +# Przygotowanie danych +data = tree_data_base.iloc[:, :9] # Zakładamy, że ostatnia kolumna to etykieta, a pierwsze osiem kolumn to atrybuty +attributes = list(data.columns[:-1]) +target_attribute = data.columns[-1] + +# Trenowanie drzewa decyzyjnego +decision_tree = id3(data, attributes, target_attribute) + + +# Opcja podglądu wyuczonego drzewa +def print_tree(tree, indent=""): + if isinstance(tree, dict): + for key, value in tree.items(): + print(f"{indent}{key}") + print_tree(value, indent + " ") + else: + print(f"{indent}{tree}") + + +print_tree(decision_tree) def information_gain(data, attribute): total_entropy = entropy(data) values = data[attribute].unique() #przypisujemy wszystkie opcje danego atrybutu np wyoski/niski/sredni