134 lines
14 KiB
Python
134 lines
14 KiB
Python
|
import pprint
|
||
|
|
||
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
|
||
|
eps = np.finfo(float).eps
|
||
|
from numpy import log2 as log
|
||
|
|
||
|
class DecisionTree:
|
||
|
|
||
|
def __init__(self):
|
||
|
bomb_type = 'Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Atomic Bomb,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Decoy,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Claymore,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Chemical Bomb,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine,Land Mine'.split(',')
|
||
|
detonation_duration = 'long,long,long,long,long,long,long,long,long,long,long,long,short,short,short,short,short,short,short,short,short,short,short,short,none,none,none,none,none,none,none,none,none,none,none,none,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,long,long,long,long,long,long,long,long,long,long,long,long,short,short,short,short,short,short,short,short,short,short,short,short,none,none,none,none,none,none,none,none,none,none,none,none,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,long,long,long,long,long,long,long,long,long,long,long,long,short,short,short,short,short,short,short,short,short,short,short,short,none,none,none,none,none,none,none,none,none,none,none,none,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,long,long,long,long,long,long,long,long,long,long,long,long,short,short,short,short,short,short,short,short,short,short,short,short,none,none,none,none,none,none,none,none,none,none,none,none,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,long,long,long,long,long,long,long,long,long,long,long,long,short,short,short,short,short,short,short,short,short,short,short,short,none,none,none,none,none,none,none,none,none,none,none,none,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate,immediate'.split(
|
||
|
',')
|
||
|
|
||
|
size = 'small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large,small,small,small,small,medium,medium,medium,medium,large,large,large,large'.split(
|
||
|
',')
|
||
|
defusable = 'yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no,yes,no'.split(
|
||
|
',')
|
||
|
detonation_area = 'large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small,large,large,small,small'.split(
|
||
|
',')
|
||
|
action = 'defuse,poligon,detonate,detonate,defuse,poligon,detonate,detonate,defuse,poligon,detonate,detonate,defuse,poligon,detonate,detonate,defuse,poligon,detonate,detonate,defuse,poligon,detonate,detonate,defuse,poligon,detonate,detonate,defuse,poligon,detonate,detonate,defuse,poligon,detonate,detonate,defuse,poligon,detonate,detonate,defuse,poligon,detonate,detonate,defuse,poligon,detonate,detonate,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,defuse,poligon,defuse,detonate,defuse,poligon,defuse,poligon,detonate,poligon,defuse,poligon,defuse,poligon,defuse,detonate,poligon,poligon,detonate,poligon,defuse,poligon,defuse,detonate,defuse,poligon,defuse,detonate,defuse,poligon,defuse,poligon,defuse,poligon,defuse,detonate,poligon,poligon,detonate,detonate,defuse,poligon,poligon,detonate,poligon,detonate,detonate,poligon,defuse,poligon,defuse,poligon,defuse,poligon,defuse,poligon,defuse,poligon,defuse,detonate,defuse,detonate,defuse,detonate,defuse,detonate,defuse,detonate,defuse,detonate,defuse,detonate,defuse,poligon,defuse,poligon,defuse,poligon,defuse,poligon,defuse,poligon,defuse,poligon,defuse,poligon,defuse,poligon,defuse,poligon,defuse,detonate,defuse,poligon,defuse,detonate,defuse,detonate,defuse,detonate,defuse,detonate,defuse,detonate,defuse,poligon,defuse,poligon,defuse,poligon,defuse,poligon,defuse,poligon,defuse,poligon,defuse,poligon,defuse,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,poligon,defuse,detonate,defuse,detonate,defuse,detonate,defuse,detonate,defuse,detonate,defuse,detonate'.split(
|
||
|
',')
|
||
|
|
||
|
dataset = {'bomb_type': bomb_type, 'detonation_duration': detonation_duration, 'size': size,
|
||
|
'detonation_area': detonation_area,
|
||
|
'defusable': defusable, 'action': action}
|
||
|
df = pd.DataFrame(dataset,
|
||
|
columns=['bomb_type', 'detonation_duration', 'size', 'detonation_area', 'defusable',
|
||
|
"action"])
|
||
|
self.tree = self.buildTree(df)
|
||
|
pprint.pprint(self.tree)
|
||
|
|
||
|
def find_entropy(self,df):
|
||
|
Class = df.keys()[-1] # To make the code generic, changing target variable class name
|
||
|
entropy = 0
|
||
|
values = df[Class].unique()
|
||
|
for value in values:
|
||
|
fraction = df[Class].value_counts()[value] / len(df[Class])
|
||
|
entropy += -fraction * np.log2(fraction)
|
||
|
return entropy
|
||
|
|
||
|
|
||
|
def find_entropy_attribute(self,df, attribute):
|
||
|
Class = df.keys()[-1] # To make the code generic, changing target variable class name
|
||
|
target_variables = df[Class].unique() # This gives all 'Yes' and 'No'
|
||
|
variables = df[
|
||
|
attribute].unique() # This gives different features in that attribute (like 'Hot','Cold' in Temperature)
|
||
|
entropy2 = 0
|
||
|
for variable in variables:
|
||
|
entropy = 0
|
||
|
for target_variable in target_variables:
|
||
|
num = len(df[attribute][df[attribute] == variable][df[Class] == target_variable])
|
||
|
den = len(df[attribute][df[attribute] == variable])
|
||
|
fraction = num / (den + eps)
|
||
|
entropy += -fraction * log(fraction + eps)
|
||
|
fraction2 = den / len(df)
|
||
|
entropy2 += -fraction2 * entropy
|
||
|
return abs(entropy2)
|
||
|
|
||
|
|
||
|
def find_winner(self,df):
|
||
|
Entropy_att = []
|
||
|
IG = []
|
||
|
for key in df.keys()[:-1]:
|
||
|
# Entropy_att.append(find_entropy_attribute(df,key))
|
||
|
IG.append(self.find_entropy(df) - self.find_entropy_attribute(df, key))
|
||
|
return df.keys()[:-1][np.argmax(IG)]
|
||
|
|
||
|
|
||
|
def get_subtable(self,df, node, value):
|
||
|
return df[df[node] == value].reset_index(drop=True)
|
||
|
|
||
|
|
||
|
def buildTree(self,df, tree=None):
|
||
|
Class = df.keys()[-1] # To make the code generic, changing target variable class name
|
||
|
|
||
|
# Here we build our decision tree
|
||
|
|
||
|
# Get attribute with maximum information gain
|
||
|
node = self.find_winner(df)
|
||
|
|
||
|
# Get distinct value of that attribute e.g Salary is node and Low,Med and High are values
|
||
|
attValue = np.unique(df[node])
|
||
|
|
||
|
# Create an empty dictionary to create tree
|
||
|
if tree is None:
|
||
|
tree = {}
|
||
|
tree[node] = {}
|
||
|
|
||
|
# We make loop to construct a tree by calling this function recursively.
|
||
|
# In this we check if the subset is pure and stops if it is pure.
|
||
|
|
||
|
for value in attValue:
|
||
|
|
||
|
subtable = self.get_subtable(df, node, value)
|
||
|
clValue, counts = np.unique(subtable['action'], return_counts=True)
|
||
|
|
||
|
if len(counts) == 1: # Checking purity of subset
|
||
|
tree[node][value] = clValue[0]
|
||
|
else:
|
||
|
tree[node][value] = self.buildTree(subtable) # Calling the function recursively
|
||
|
return tree
|
||
|
|
||
|
def key(self,t, bomb):
|
||
|
if t in ["poligon", "defuse", "detonate"]:
|
||
|
print(t)
|
||
|
return
|
||
|
for x in t.keys():
|
||
|
if x == "defusable":
|
||
|
for y in t[x].keys():
|
||
|
if y == bomb.getDefusable():
|
||
|
self.key(t[x][y],bomb)
|
||
|
if x == "bomb_type":
|
||
|
for y in t[x].keys():
|
||
|
if y == bomb.getBombType():
|
||
|
self.key(t[x][y],bomb)
|
||
|
if x == "size":
|
||
|
for y in t[x].keys():
|
||
|
if y == bomb.getSize():
|
||
|
self.key(t[x][y],bomb)
|
||
|
if x == "detonation_duration":
|
||
|
for y in t[x].keys():
|
||
|
if y == bomb.getDetonationDuration():
|
||
|
self.key(t[x][y],bomb)
|
||
|
if x == "detonation_area":
|
||
|
for y in t[x].keys():
|
||
|
if y == bomb.getDetonationArea():
|
||
|
self.key(t[x][y],bomb)
|
||
|
|
||
|
|
||
|
|
||
|
|