From bf6cef912ed1cb98af2a410feabcb4d43de0588b Mon Sep 17 00:00:00 2001
From: Stanislav Lytvynenko <stalyt@st.amu.edu.pl>
Date: Thu, 27 Jun 2024 02:50:38 +0200
Subject: [PATCH] dodalem drzewo decyzyjne

---
 ai-wozek/.idea/misc.xml  |  2 +-
 ai-wozek/.idea/wozek.iml |  2 +-
 ai-wozek/decision_tree   | 74 ++++++++++++++++++++++++++++++++++++++++
 ai-wozek/wozek.py        | 66 ++++++++++++++++++++++++++++++++++-
 4 files changed, 141 insertions(+), 3 deletions(-)
 create mode 100644 ai-wozek/decision_tree
diff --git a/ai-wozek/.idea/misc.xml b/ai-wozek/.idea/misc.xml
index f47cba7..a9bb0db 100644
--- a/ai-wozek/.idea/misc.xml
+++ b/ai-wozek/.idea/misc.xml
@@ -3,5 +3,5 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.11 (ai-wozek) (2)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (ai-wozek)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (ai-wozek)" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/ai-wozek/.idea/wozek.iml b/ai-wozek/.idea/wozek.iml
index acbca30..74d515a 100644
--- a/ai-wozek/.idea/wozek.iml
+++ b/ai-wozek/.idea/wozek.iml
@@ -4,7 +4,7 @@
     <content url="file://$MODULE_DIR$">
       <excludeFolder url="file://$MODULE_DIR$/venv" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.12 (ai-wozek)" jdkType="Python SDK" />
+    <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>
\ No newline at end of file
diff --git a/ai-wozek/decision_tree b/ai-wozek/decision_tree
new file mode 100644
index 0000000..5bfb347
--- /dev/null
+++ b/ai-wozek/decision_tree
@@ -0,0 +1,74 @@
+digraph {
+	root [label=root]
+	Label_State [label=Label_State]
+	no [label=no]
+	Label_State -> no [label=""]
+	Height [label=Height]
+	no -> Height [label=""]
+	no [label=no shape=box]
+	Height -> no [label=medium]
+	no [label=no shape=box]
+	Height -> no [label=big]
+	small [label=small]
+	Height -> small [label=""]
+	Width [label=Width]
+	small -> Width [label=""]
+	small [label=small]
+	Width -> small [label=""]
+	Depth [label=Depth]
+	small -> Depth [label=""]
+	no [label=no shape=box]
+	Depth -> no [label=big]
+	yes [label=yes shape=box]
+	Depth -> yes [label=medium]
+	no [label=no shape=box]
+	Width -> no [label=big]
+	medium [label=medium]
+	Width -> medium [label=""]
+	Depth [label=Depth]
+	medium -> Depth [label=""]
+	no [label=no shape=box]
+	Depth -> no [label=big]
+	yes [label=yes shape=box]
+	Depth -> yes [label=medium]
+	yes [label=yes shape=box]
+	Depth -> yes [label=small]
+	yes [label=yes]
+	Label_State -> yes [label=""]
+	Damage [label=Damage]
+	yes -> Damage [label=""]
+	yes [label=yes shape=box]
+	Damage -> yes [label=no]
+	yes [label=yes]
+	Damage -> yes [label=""]
+	Height [label=Height]
+	yes -> Height [label=""]
+	no [label=no shape=box]
+	Height -> no [label=medium]
+	no [label=no shape=box]
+	Height -> no [label=big]
+	small [label=small]
+	Height -> small [label=""]
+	Width [label=Width]
+	small -> Width [label=""]
+	no [label=no shape=box]
+	Width -> no [label=big]
+	small [label=small]
+	Width -> small [label=""]
+	Depth [label=Depth]
+	small -> Depth [label=""]
+	yes [label=yes shape=box]
+	Depth -> yes [label=medium]
+	no [label=no shape=box]
+	Depth -> no [label=big]
+	yes [label=yes shape=box]
+	Depth -> yes [label=small]
+	medium [label=medium]
+	Width -> medium [label=""]
+	Value [label=Value]
+	medium -> Value [label=""]
+	yes [label=yes shape=box]
+	Value -> yes [label=cheap]
+	no [label=no shape=box]
+	Value -> no [label=expensive]
+}
diff --git a/ai-wozek/wozek.py b/ai-wozek/wozek.py
index afd08aa..67cdce3 100644
--- a/ai-wozek/wozek.py
+++ b/ai-wozek/wozek.py
@@ -378,13 +378,77 @@ def astar(isstate,final):
 
 #drzewko
 tree_data_base = pd.read_csv('paczki.csv')
+
+
 def entropy(data):
     labels = data.iloc[:, -1]  # Ostatnia kolumna zawiera etykiety klas i pomija 1 wiersz bo jest tytulowy
-    counts = labels.value_counts() #tu zlicza wszystkie opcje
+    counts = labels.value_counts()  # tu zlicza wszystkie opcje
     probabilities = counts / len(labels)
     entropy = -sum(probabilities * np.log2(probabilities))
     return entropy
 
+
+def information_gain(data, attribute):
+    total_entropy = entropy(data)
+    values = data[attribute].unique()  # przypisujemy wszystkie opcje danego atrybutu np wyoski/niski/sredni
+    weighted_entropy = 0
+    for value in values:
+        subset = data[data[attribute] == value]  # przypisujesz wszystkie wiersze danego value do subset
+        subset_entropy = entropy(subset)
+        weighted_entropy += (len(subset) / len(data)) * subset_entropy
+    return total_entropy - weighted_entropy
+
+
+def id3(data, attributes, target_attribute):
+    unique_targets = data[target_attribute].unique()
+
+    # Jeśli wszystkie przykłady mają tę samą etykietę, zwróć tę etykietę
+    if len(unique_targets) == 1:
+        return unique_targets[0]
+
+    # Jeśli zbiór atrybutów jest pusty, zwróć najczęstszą etykietę
+    if len(attributes) == 0:
+        return data[target_attribute].mode()[0]
+
+    # Wybierz atrybut o największym przyroście informacji
+    info_gains = [(attr, information_gain(data, attr)) for attr in attributes]
+    best_attribute = max(info_gains, key=lambda x: x[1])[0]
+
+    # Tworzymy węzeł drzewa
+    tree = {best_attribute: {}}
+
+    # Usuwamy wybrany atrybut z listy atrybutów
+    attributes = [attr for attr in attributes if attr != best_attribute]
+
+    # Dla każdej wartości wybranego atrybutu tworzę gałąź drzewa
+    for value in data[best_attribute].unique():
+        subset = data[data[best_attribute] == value]
+        subtree = id3(subset, attributes, target_attribute)
+        tree[best_attribute][value] = subtree
+
+    return tree
+
+
+# Przygotowanie danych
+data = tree_data_base.iloc[:, :9]  # Zakładamy, że ostatnia kolumna to etykieta, a pierwsze osiem kolumn to atrybuty
+attributes = list(data.columns[:-1])
+target_attribute = data.columns[-1]
+
+# Trenowanie drzewa decyzyjnego
+decision_tree = id3(data, attributes, target_attribute)
+
+
+# Opcja podglądu wyuczonego drzewa
+def print_tree(tree, indent=""):
+    if isinstance(tree, dict):
+        for key, value in tree.items():
+            print(f"{indent}{key}")
+            print_tree(value, indent + "  ")
+    else:
+        print(f"{indent}{tree}")
+
+
+print_tree(decision_tree)
 def information_gain(data, attribute):
     total_entropy = entropy(data)
     values = data[attribute].unique()   #przypisujemy wszystkie opcje danego atrybutu np wyoski/niski/sredni