Fixed tree

This commit is contained in:
Serhii Hromov 2020-05-18 10:20:10 +00:00
parent f76f0c2639
commit 7e92796a19

70
main.py
View File

@ -30,13 +30,23 @@ EAT_TIME = 15
#### Menu #### Menu
menu = Context.fromstring(''' |meat|salad|meal|drink|cold|hot | menu = Context.fromstring(''' |meat|salad|meal|drink|cold|hot |
Pork | X | | X | | | X | Pork | X | | | | | X |
Espresso | | | | X | | X | Espresso | | | | X | | X |
Green Tea | | | | X | X | | Green Tea | | | | X | X | |
Greek Salad| | X | X | | X | | Greek Salad| | X | | | X | |
Pizza | | | X | | | X |''') Pizza | | | X | | | X |''')
training_data = [
['meat','hot','Pork'],
['salad','cold','Greek Salad'],
['drink','hot','Espresso'],
['drink','cold','Green Tea'],
['meal','hot','Pizza'],
]
tree_format = ["dish", "temperature", "label"]
#menu.lattice.graphviz() #menu.lattice.graphviz()
#Digraph.render('Lattice.gv', view=True) #Digraph.render('Lattice.gv', view=True)
@ -46,7 +56,6 @@ menu = Context.fromstring(''' |meat|salad|meal|drink|cold|hot |
#print(func_output) #print(func_output)
'''
def uniq_val_from_data(rows, col): def uniq_val_from_data(rows, col):
return set([row[col] for row in rows]) return set([row[col] for row in rows])
@ -65,16 +74,14 @@ def isnumer(value):
return isinstance(value, int) or isinstance(value, float) return isinstance(value, int) or isinstance(value, float)
header = ...
class Question(): class Question():
def __init__(self, column, value): def __init__(self, col, value):
self.column = column self.col = col
self.value = value self.value = value
def compare(self, example): def compare(self, example):
val = example[self.column] val = example[self.col]
if isnumer(val): if isnumer(val):
return val >= self.value return val >= self.value
else: else:
@ -83,14 +90,14 @@ class Question():
def __repr__(self): def __repr__(self):
condition = "==" condition = "=="
if isnumer(self.value): if isnumer(self.value):
condition = ">=" condition = ">="
return "Is %s %s %s?" % (header[self.column], condition, str(self.value)) return "Is %s %s %s?" % (tree_format[self.col], condition, str(self.value))
def partition(rows, quest): def partition(rows, quest):
t_rows, f_rows = [], [] t_rows, f_rows = [], []
for rows in rows: for row in rows:
if quest.compare(row) if quest.compare(row):
t_rows.append(row) t_rows.append(row)
else: else:
f_rows.append(row) f_rows.append(row)
@ -101,12 +108,12 @@ def gini(rows):
counts = class_counts(rows) counts = class_counts(rows)
impurity = 1 impurity = 1
for lbl in counts: for lbl in counts:
prob_of_lbl = counts[lbl] / float(lem(rows)) prob_of_lbl = counts[lbl] / float(len(rows))
impurity -= prob_of_lbl**2 impurity -= prob_of_lbl**2
return impurity return impurity
def info_gain(l,r, current_uncertainty): def info_gain(l, r, current_uncertainty):
p = float(len(l)) / (len(l) + len(r)) p = float(len(l)) / (len(l) + len(r))
return current_uncertainty - p*gini(l) - (1-p)*gini(r) return current_uncertainty - p*gini(l) - (1-p)*gini(r)
@ -115,29 +122,29 @@ def find_best_q(rows):
best_gain = 0 best_gain = 0
best_quest = None best_quest = None
current_uncertainty = gini(rows) current_uncertainty = gini(rows)
n_features = len(rows[0]) - 1 n_feat = len(rows[0]) - 1
for col in range(n_feat): for col in range(n_feat):
values = set([row[col] for row in rows]) vals = set([row[col] for row in rows])
for cal in values: for val in vals:
quest = Question(col, val) quest = Question(col, val)
t_rows, f_rows = partition(rows, quest) t_rows, f_rows = partition(rows, quest)
if len(t_rows) == 0 or len(f_rows) == 0Ж if len(t_rows) == 0 or len(f_rows) == 0:
continue continue
fain = info_gain(t_rows, f_rows, current_uncertainty) gain = info_gain(t_rows, f_rows, current_uncertainty)
if gain >= best gain: if gain >= best_gain:
best_gain, best_quest = gain, quest best_gain, best_quest = gain, quest
return best_gain, best_quest return best_gain, best_quest
class Leaf: class Leaf:
def __init__(self,rows): def __init__(self, rows):
self.predicts = class_counts(rows) self.predicts = class_counts(rows)
@ -148,7 +155,7 @@ class Decision_Node():
self.f_branch = f_branch self.f_branch = f_branch
def build_tree(): def build_tree(rows):
gain, quest = find_best_q(rows) gain, quest = find_best_q(rows)
if gain == 0: if gain == 0:
@ -162,22 +169,22 @@ def build_tree():
return Decision_Node(quest, t_branch, f_branch) return Decision_Node(quest, t_branch, f_branch)
def print_tree(node): def print_tree(node, spc=""):
if isinstance(node, leaf): if isinstance(node, Leaf):
print("" + "Predict", node.predictions) print(" " + "Predict", node.predicts)
return return
print("" + str(node.quest)) print("" + str(node.quest))
print("" + '--> True:') print("" + '--> True:')
print_tree(node.t_branch, ""+ " ") print_tree(node.t_branch, spc + " ")
print("" + '--> False:') print("" + '--> False:')
print_tree(node.f_branch,"" + " ") print_tree(node.f_branch, spc + " ")
def classify(row, node): def classify(row, node):
if isinstance(node, leaf): if isinstance(node, Leaf):
return node.predictions return node.predictions
if node.quest.compare(row): if node.quest.compare(row):
@ -194,7 +201,12 @@ def print_leaf(counts):
return probs return probs
'''
#print(menu.extension(['meal',]))
tree = build_tree(training_data)
print_tree(tree)
### ###
class Node: class Node: