Projekt_AI-Automatyczny_saper/Engine/DecisionTree.py
2021-05-18 19:01:17 +02:00

123 lines
4.3 KiB
Python

import numpy as np
import pandas as pd
import json
eps = np.finfo(float).eps
from numpy import log2 as log
LEAVES = 'action'
class DecisionTree:
def __init__(self, doCreation):
if doCreation:
return
with open('DecisionTree.json', 'r') as fp:
self.tree = json.load(fp)
def find_entropy(self,df):
Class = df.keys()[-1] # To make the code generic, changing target variable class name
entropy = 0
values = df[Class].unique()
for value in values:
fraction = df[Class].value_counts()[value] / len(df[Class])
entropy += -fraction * np.log2(fraction)
return entropy
def find_entropy_attribute(self,df, attribute):
Class = df.keys()[-1] # To make the code generic, changing target variable class name
target_variables = df[Class].unique() # This gives all 'Yes' and 'No'
variables = df[
attribute].unique() # This gives different features in that attribute (like 'Hot','Cold' in Temperature)
entropy2 = 0
for variable in variables:
entropy = 0
for target_variable in target_variables:
num = len(df[attribute][df[attribute] == variable][df[Class] == target_variable])
den = len(df[attribute][df[attribute] == variable])
fraction = num / (den + eps)
entropy += -fraction * log(fraction + eps)
fraction2 = den / len(df)
entropy2 += -fraction2 * entropy
return abs(entropy2)
def find_winner(self,df):
Entropy_att = []
IG = []
for key in df.keys()[:-1]:
# Entropy_att.append(find_entropy_attribute(df,key))
IG.append(self.find_entropy(df) - self.find_entropy_attribute(df, key))
return df.keys()[:-1][np.argmax(IG)]
def get_subtable(self,df, node, value):
return df[df[node] == value].reset_index(drop=True)
def buildTree(self,df, tree=None):
Class = df.keys()[-1] # To make the code generic, changing target variable class name
# Here we build our decision tree
# Get attribute with maximum information gain
node = self.find_winner(df)
# Get distinct value of that attribute e.g Salary is node and Low,Med and High are values
attValue = np.unique(df[node])
# Create an empty dictionary to create tree
if tree is None:
tree = {}
tree[node] = {}
# We make loop to construct a tree by calling this function recursively.
# In this we check if the subset is pure and stops if it is pure.
for value in attValue:
subtable = self.get_subtable(df, node, value)
clValue, counts = np.unique(subtable[LEAVES], return_counts=True)
if len(counts) == 1: # Checking purity of subset
tree[node][value] = clValue[0]
else:
tree[node][value] = self.buildTree(subtable) # Calling the function recursively
with open('C:\\Users\\kratu\\PycharmProjects\\Projekt_AI-Automatyczny_saper\\DecisionTree.json', 'w') as fp:
json.dump(tree,fp)
return tree
def key(self,t, bomb):
if t in ["poligon", "defuse", "detonate"]:
print(t)
return
for x in t.keys():
if x == "defusable":
for y in t[x].keys():
if y == bomb.getDefusable():
self.key(t[x][y],bomb)
if x == "bomb_type":
for y in t[x].keys():
if y == bomb.getBombType():
self.key(t[x][y],bomb)
if x == "size":
for y in t[x].keys():
if y == bomb.getSize():
self.key(t[x][y],bomb)
if x == "detonation_duration":
for y in t[x].keys():
if y == bomb.getDetonationDuration():
self.key(t[x][y],bomb)
if x == "detonation_area":
for y in t[x].keys():
if y == bomb.getDetonationArea():
self.key(t[x][y],bomb)
if __name__ == "__main__":
data = pd.read_csv("C:\\Users\\kratu\\PycharmProjects\\Projekt_AI-Automatyczny_saper\\out.csv")
DecisionTree(True).buildTree(data)