127 lines
4.3 KiB
Python
127 lines
4.3 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
import json
|
|
|
|
eps = np.finfo(float).eps
|
|
from numpy import log2 as log
|
|
LEAVES = 'action'
|
|
|
|
class DecisionTree:
|
|
|
|
|
|
def __init__(self, doCreation):
|
|
if doCreation:
|
|
return
|
|
with open('DecisionTree.json', 'r') as fp:
|
|
self.tree = json.load(fp)
|
|
|
|
|
|
def find_entropy(self,df):
|
|
Class = df.keys()[-1] # To make the code generic, changing target variable class name
|
|
entropy = 0
|
|
values = df[Class].unique()
|
|
for value in values:
|
|
fraction = df[Class].value_counts()[value] / len(df[Class])
|
|
entropy += -fraction * np.log2(fraction)
|
|
return entropy
|
|
|
|
|
|
def find_entropy_attribute(self,df, attribute):
|
|
Class = df.keys()[-1] # To make the code generic, changing target variable class name
|
|
target_variables = df[Class].unique() # This gives all 'Yes' and 'No'
|
|
variables = df[
|
|
attribute].unique() # This gives different features in that attribute (like 'Hot','Cold' in Temperature)
|
|
entropy2 = 0
|
|
for variable in variables:
|
|
entropy = 0
|
|
for target_variable in target_variables:
|
|
num = len(df[attribute][df[attribute] == variable][df[Class] == target_variable])
|
|
den = len(df[attribute][df[attribute] == variable])
|
|
fraction = num / (den + eps)
|
|
entropy += -fraction * log(fraction + eps)
|
|
fraction2 = den / len(df)
|
|
entropy2 += -fraction2 * entropy
|
|
return abs(entropy2)
|
|
|
|
|
|
def find_winner(self,df):
|
|
Entropy_att = []
|
|
IG = []
|
|
for key in df.keys()[:-1]:
|
|
# Entropy_att.append(find_entropy_attribute(df,key))
|
|
IG.append(self.find_entropy(df) - self.find_entropy_attribute(df, key))
|
|
return df.keys()[:-1][np.argmax(IG)]
|
|
|
|
|
|
def get_subtable(self,df, node, value):
|
|
return df[df[node] == value].reset_index(drop=True)
|
|
|
|
|
|
def buildTree(self,df, tree=None):
|
|
Class = df.keys()[-1] # To make the code generic, changing target variable class name
|
|
|
|
# Here we build our decision tree
|
|
|
|
# Get attribute with maximum information gain
|
|
node = self.find_winner(df)
|
|
|
|
# Get distinct value of that attribute e.g Salary is node and Low,Med and High are values
|
|
attValue = np.unique(df[node])
|
|
|
|
# Create an empty dictionary to create tree
|
|
if tree is None:
|
|
tree = {}
|
|
tree[node] = {}
|
|
|
|
# We make loop to construct a tree by calling this function recursively.
|
|
# In this we check if the subset is pure and stops if it is pure.
|
|
|
|
for value in attValue:
|
|
|
|
subtable = self.get_subtable(df, node, value)
|
|
clValue, counts = np.unique(subtable[LEAVES], return_counts=True)
|
|
|
|
if len(counts) == 1: # Checking purity of subset
|
|
tree[node][value] = clValue[0]
|
|
else:
|
|
tree[node][value] = self.buildTree(subtable) # Calling the function recursively
|
|
with open('C:\\Users\\kratu\\PycharmProjects\\Projekt_AI-Automatyczny_saper\\DecisionTree.json', 'w') as fp:
|
|
json.dump(tree,fp)
|
|
return tree
|
|
|
|
def key(self,t, bomb):
|
|
if t in ["poligon", "defuse", "detonate"]:
|
|
print(t)
|
|
return
|
|
for x in t.keys():
|
|
if x == "defusable":
|
|
for y in t[x].keys():
|
|
if y == bomb.getDefusable():
|
|
self.key(t[x][y],bomb)
|
|
if x == "bomb_type":
|
|
for y in t[x].keys():
|
|
if y == bomb.getBombType():
|
|
self.key(t[x][y],bomb)
|
|
if x == "size":
|
|
for y in t[x].keys():
|
|
if y == bomb.getSize():
|
|
self.key(t[x][y],bomb)
|
|
if x == "detonation_duration":
|
|
for y in t[x].keys():
|
|
if y == bomb.getDetonationDuration():
|
|
self.key(t[x][y],bomb)
|
|
if x == "detonation_area":
|
|
for y in t[x].keys():
|
|
if y == bomb.getDetonationArea():
|
|
self.key(t[x][y],bomb)
|
|
|
|
if __name__ == "__main__":
|
|
data = pd.read_csv("C:\\Users\\kratu\\PycharmProjects\\Projekt_AI-Automatyczny_saper\\out.csv")
|
|
DecisionTree(True).buildTree(data)
|
|
|
|
|
|
|
|
|
|
|
|
|