dodalem drzewo decyzyjne
This commit is contained in:
parent
1fd7a90fd1
commit
bf6cef912e
@ -3,5 +3,5 @@
|
|||||||
<component name="Black">
|
<component name="Black">
|
||||||
<option name="sdkName" value="Python 3.11 (ai-wozek) (2)" />
|
<option name="sdkName" value="Python 3.11 (ai-wozek) (2)" />
|
||||||
</component>
|
</component>
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (ai-wozek)" project-jdk-type="Python SDK" />
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (ai-wozek)" project-jdk-type="Python SDK" />
|
||||||
</project>
|
</project>
|
@ -4,7 +4,7 @@
|
|||||||
<content url="file://$MODULE_DIR$">
|
<content url="file://$MODULE_DIR$">
|
||||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="jdk" jdkName="Python 3.12 (ai-wozek)" jdkType="Python SDK" />
|
<orderEntry type="inheritedJdk" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
74
ai-wozek/decision_tree
Normal file
74
ai-wozek/decision_tree
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
digraph {
|
||||||
|
root [label=root]
|
||||||
|
Label_State [label=Label_State]
|
||||||
|
no [label=no]
|
||||||
|
Label_State -> no [label=""]
|
||||||
|
Height [label=Height]
|
||||||
|
no -> Height [label=""]
|
||||||
|
no [label=no shape=box]
|
||||||
|
Height -> no [label=medium]
|
||||||
|
no [label=no shape=box]
|
||||||
|
Height -> no [label=big]
|
||||||
|
small [label=small]
|
||||||
|
Height -> small [label=""]
|
||||||
|
Width [label=Width]
|
||||||
|
small -> Width [label=""]
|
||||||
|
small [label=small]
|
||||||
|
Width -> small [label=""]
|
||||||
|
Depth [label=Depth]
|
||||||
|
small -> Depth [label=""]
|
||||||
|
no [label=no shape=box]
|
||||||
|
Depth -> no [label=big]
|
||||||
|
yes [label=yes shape=box]
|
||||||
|
Depth -> yes [label=medium]
|
||||||
|
no [label=no shape=box]
|
||||||
|
Width -> no [label=big]
|
||||||
|
medium [label=medium]
|
||||||
|
Width -> medium [label=""]
|
||||||
|
Depth [label=Depth]
|
||||||
|
medium -> Depth [label=""]
|
||||||
|
no [label=no shape=box]
|
||||||
|
Depth -> no [label=big]
|
||||||
|
yes [label=yes shape=box]
|
||||||
|
Depth -> yes [label=medium]
|
||||||
|
yes [label=yes shape=box]
|
||||||
|
Depth -> yes [label=small]
|
||||||
|
yes [label=yes]
|
||||||
|
Label_State -> yes [label=""]
|
||||||
|
Damage [label=Damage]
|
||||||
|
yes -> Damage [label=""]
|
||||||
|
yes [label=yes shape=box]
|
||||||
|
Damage -> yes [label=no]
|
||||||
|
yes [label=yes]
|
||||||
|
Damage -> yes [label=""]
|
||||||
|
Height [label=Height]
|
||||||
|
yes -> Height [label=""]
|
||||||
|
no [label=no shape=box]
|
||||||
|
Height -> no [label=medium]
|
||||||
|
no [label=no shape=box]
|
||||||
|
Height -> no [label=big]
|
||||||
|
small [label=small]
|
||||||
|
Height -> small [label=""]
|
||||||
|
Width [label=Width]
|
||||||
|
small -> Width [label=""]
|
||||||
|
no [label=no shape=box]
|
||||||
|
Width -> no [label=big]
|
||||||
|
small [label=small]
|
||||||
|
Width -> small [label=""]
|
||||||
|
Depth [label=Depth]
|
||||||
|
small -> Depth [label=""]
|
||||||
|
yes [label=yes shape=box]
|
||||||
|
Depth -> yes [label=medium]
|
||||||
|
no [label=no shape=box]
|
||||||
|
Depth -> no [label=big]
|
||||||
|
yes [label=yes shape=box]
|
||||||
|
Depth -> yes [label=small]
|
||||||
|
medium [label=medium]
|
||||||
|
Width -> medium [label=""]
|
||||||
|
Value [label=Value]
|
||||||
|
medium -> Value [label=""]
|
||||||
|
yes [label=yes shape=box]
|
||||||
|
Value -> yes [label=cheap]
|
||||||
|
no [label=no shape=box]
|
||||||
|
Value -> no [label=expensive]
|
||||||
|
}
|
@ -378,13 +378,77 @@ def astar(isstate,final):
|
|||||||
|
|
||||||
#drzewko
|
#drzewko
|
||||||
tree_data_base = pd.read_csv('paczki.csv')
|
tree_data_base = pd.read_csv('paczki.csv')
|
||||||
|
|
||||||
|
|
||||||
def entropy(data):
|
def entropy(data):
|
||||||
labels = data.iloc[:, -1] # Ostatnia kolumna zawiera etykiety klas i pomija 1 wiersz bo jest tytulowy
|
labels = data.iloc[:, -1] # Ostatnia kolumna zawiera etykiety klas i pomija 1 wiersz bo jest tytulowy
|
||||||
counts = labels.value_counts() #tu zlicza wszystkie opcje
|
counts = labels.value_counts() # tu zlicza wszystkie opcje
|
||||||
probabilities = counts / len(labels)
|
probabilities = counts / len(labels)
|
||||||
entropy = -sum(probabilities * np.log2(probabilities))
|
entropy = -sum(probabilities * np.log2(probabilities))
|
||||||
return entropy
|
return entropy
|
||||||
|
|
||||||
|
|
||||||
|
def information_gain(data, attribute):
|
||||||
|
total_entropy = entropy(data)
|
||||||
|
values = data[attribute].unique() # przypisujemy wszystkie opcje danego atrybutu np wyoski/niski/sredni
|
||||||
|
weighted_entropy = 0
|
||||||
|
for value in values:
|
||||||
|
subset = data[data[attribute] == value] # przypisujesz wszystkie wiersze danego value do subset
|
||||||
|
subset_entropy = entropy(subset)
|
||||||
|
weighted_entropy += (len(subset) / len(data)) * subset_entropy
|
||||||
|
return total_entropy - weighted_entropy
|
||||||
|
|
||||||
|
|
||||||
|
def id3(data, attributes, target_attribute):
|
||||||
|
unique_targets = data[target_attribute].unique()
|
||||||
|
|
||||||
|
# Jeśli wszystkie przykłady mają tę samą etykietę, zwróć tę etykietę
|
||||||
|
if len(unique_targets) == 1:
|
||||||
|
return unique_targets[0]
|
||||||
|
|
||||||
|
# Jeśli zbiór atrybutów jest pusty, zwróć najczęstszą etykietę
|
||||||
|
if len(attributes) == 0:
|
||||||
|
return data[target_attribute].mode()[0]
|
||||||
|
|
||||||
|
# Wybierz atrybut o największym przyroście informacji
|
||||||
|
info_gains = [(attr, information_gain(data, attr)) for attr in attributes]
|
||||||
|
best_attribute = max(info_gains, key=lambda x: x[1])[0]
|
||||||
|
|
||||||
|
# Tworzymy węzeł drzewa
|
||||||
|
tree = {best_attribute: {}}
|
||||||
|
|
||||||
|
# Usuwamy wybrany atrybut z listy atrybutów
|
||||||
|
attributes = [attr for attr in attributes if attr != best_attribute]
|
||||||
|
|
||||||
|
# Dla każdej wartości wybranego atrybutu tworzę gałąź drzewa
|
||||||
|
for value in data[best_attribute].unique():
|
||||||
|
subset = data[data[best_attribute] == value]
|
||||||
|
subtree = id3(subset, attributes, target_attribute)
|
||||||
|
tree[best_attribute][value] = subtree
|
||||||
|
|
||||||
|
return tree
|
||||||
|
|
||||||
|
|
||||||
|
# Przygotowanie danych
|
||||||
|
data = tree_data_base.iloc[:, :9] # Zakładamy, że ostatnia kolumna to etykieta, a pierwsze osiem kolumn to atrybuty
|
||||||
|
attributes = list(data.columns[:-1])
|
||||||
|
target_attribute = data.columns[-1]
|
||||||
|
|
||||||
|
# Trenowanie drzewa decyzyjnego
|
||||||
|
decision_tree = id3(data, attributes, target_attribute)
|
||||||
|
|
||||||
|
|
||||||
|
# Opcja podglądu wyuczonego drzewa
|
||||||
|
def print_tree(tree, indent=""):
|
||||||
|
if isinstance(tree, dict):
|
||||||
|
for key, value in tree.items():
|
||||||
|
print(f"{indent}{key}")
|
||||||
|
print_tree(value, indent + " ")
|
||||||
|
else:
|
||||||
|
print(f"{indent}{tree}")
|
||||||
|
|
||||||
|
|
||||||
|
print_tree(decision_tree)
|
||||||
def information_gain(data, attribute):
|
def information_gain(data, attribute):
|
||||||
total_entropy = entropy(data)
|
total_entropy = entropy(data)
|
||||||
values = data[attribute].unique() #przypisujemy wszystkie opcje danego atrybutu np wyoski/niski/sredni
|
values = data[attribute].unique() #przypisujemy wszystkie opcje danego atrybutu np wyoski/niski/sredni
|
||||||
|
Loading…
Reference in New Issue
Block a user