121 lines
3.9 KiB
Python
121 lines
3.9 KiB
Python
import pprint
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
eps = np.finfo(float).eps
|
|
from numpy import log2 as log
|
|
|
|
class DecisionTree:
|
|
|
|
def __init__(self):
|
|
df = pd.read_csv("out.csv")
|
|
self.tree = self.buildTree(df)
|
|
pprint.pprint(self.tree)
|
|
|
|
|
|
|
|
|
|
def find_entropy(self,df):
|
|
Class = df.keys()[-1] # To make the code generic, changing target variable class name
|
|
entropy = 0
|
|
values = df[Class].unique()
|
|
for value in values:
|
|
fraction = df[Class].value_counts()[value] / len(df[Class])
|
|
entropy += -fraction * np.log2(fraction)
|
|
return entropy
|
|
|
|
|
|
def find_entropy_attribute(self,df, attribute):
|
|
Class = df.keys()[-1] # To make the code generic, changing target variable class name
|
|
target_variables = df[Class].unique() # This gives all 'Yes' and 'No'
|
|
variables = df[
|
|
attribute].unique() # This gives different features in that attribute (like 'Hot','Cold' in Temperature)
|
|
entropy2 = 0
|
|
for variable in variables:
|
|
entropy = 0
|
|
for target_variable in target_variables:
|
|
num = len(df[attribute][df[attribute] == variable][df[Class] == target_variable])
|
|
den = len(df[attribute][df[attribute] == variable])
|
|
fraction = num / (den + eps)
|
|
entropy += -fraction * log(fraction + eps)
|
|
fraction2 = den / len(df)
|
|
entropy2 += -fraction2 * entropy
|
|
return abs(entropy2)
|
|
|
|
|
|
def find_winner(self,df):
|
|
Entropy_att = []
|
|
IG = []
|
|
for key in df.keys()[:-1]:
|
|
# Entropy_att.append(find_entropy_attribute(df,key))
|
|
IG.append(self.find_entropy(df) - self.find_entropy_attribute(df, key))
|
|
return df.keys()[:-1][np.argmax(IG)]
|
|
|
|
|
|
def get_subtable(self,df, node, value):
|
|
return df[df[node] == value].reset_index(drop=True)
|
|
|
|
|
|
def buildTree(self,df, tree=None):
|
|
Class = df.keys()[-1] # To make the code generic, changing target variable class name
|
|
|
|
# Here we build our decision tree
|
|
|
|
# Get attribute with maximum information gain
|
|
node = self.find_winner(df)
|
|
|
|
# Get distinct value of that attribute e.g Salary is node and Low,Med and High are values
|
|
attValue = np.unique(df[node])
|
|
|
|
# Create an empty dictionary to create tree
|
|
if tree is None:
|
|
tree = {}
|
|
tree[node] = {}
|
|
|
|
# We make loop to construct a tree by calling this function recursively.
|
|
# In this we check if the subset is pure and stops if it is pure.
|
|
|
|
for value in attValue:
|
|
|
|
subtable = self.get_subtable(df, node, value)
|
|
clValue, counts = np.unique(subtable['action'], return_counts=True)
|
|
|
|
if len(counts) == 1: # Checking purity of subset
|
|
tree[node][value] = clValue[0]
|
|
else:
|
|
tree[node][value] = self.buildTree(subtable) # Calling the function recursively
|
|
return tree
|
|
|
|
def key(self,t, bomb):
|
|
if t in ["poligon", "defuse", "detonate"]:
|
|
print(t)
|
|
return
|
|
for x in t.keys():
|
|
if x == "defusable":
|
|
for y in t[x].keys():
|
|
if y == bomb.getDefusable():
|
|
self.key(t[x][y],bomb)
|
|
if x == "bomb_type":
|
|
for y in t[x].keys():
|
|
if y == bomb.getBombType():
|
|
self.key(t[x][y],bomb)
|
|
if x == "size":
|
|
for y in t[x].keys():
|
|
if y == bomb.getSize():
|
|
self.key(t[x][y],bomb)
|
|
if x == "detonation_duration":
|
|
for y in t[x].keys():
|
|
if y == bomb.getDetonationDuration():
|
|
self.key(t[x][y],bomb)
|
|
if x == "detonation_area":
|
|
for y in t[x].keys():
|
|
if y == bomb.getDetonationArea():
|
|
self.key(t[x][y],bomb)
|
|
|
|
|
|
|
|
|
|
|
|
|