2020-06-14 16:25:20 +02:00
|
|
|
training_data = [
|
|
|
|
#kiedyNawadniano, coIleDniTrzebaNawadniac, czyMaPadac, kiedyPadalo, nawodnienie
|
|
|
|
[2, 3, 't', 1, 'n'],
|
|
|
|
[1, 3, 't', 1, 's'],
|
|
|
|
[5, 2, 'n', 1, 's'],
|
|
|
|
[3, 5, 'n', 1, 'n'],
|
|
|
|
[3, 1, 't', 2, 's'],
|
|
|
|
[2, 4, 'n', 2, 'n'],
|
|
|
|
[4, 6, 't', 3, 'n'],
|
|
|
|
[6, 5, 't', 3, 'n'],
|
|
|
|
[1, 2, 't', 4, 's'],
|
|
|
|
[7, 3, 'n', 5, 's'],
|
|
|
|
[4, 4, 'n', 5, 'n'],
|
|
|
|
[5, 6, 't', 5, 's'],
|
|
|
|
[2, 7, 't', 1, 'n'],
|
|
|
|
[5, 6, 't', 7, 's'],
|
|
|
|
[5, 3, 'n', 7, 's'],
|
|
|
|
[3, 2, 'n', 7, 'n'],
|
|
|
|
[3, 5, 't', 4, 's'],
|
|
|
|
[3, 4, 'n', 4, 'n'],
|
|
|
|
[4, 3, 't', 6, 'n'],
|
|
|
|
[6, 3, 't', 6, 'n'],
|
|
|
|
[1, 4, 't', 6, 's'],
|
|
|
|
[7, 5, 'n', 3, 's'],
|
|
|
|
[2, 5, 'n', 3, 'n'],
|
|
|
|
[4, 6, 't', 3, 's'],
|
|
|
|
[4, 8, 'n', 4, 's']
|
|
|
|
]
|
|
|
|
|
|
|
|
header = ["kiedyNawadniano", "coIleDni", "czyMaPadac", "kiedyPadalo", "nawodnienie"]
|
|
|
|
|
|
|
|
def class_counts(rows):
|
|
|
|
counts = {}
|
|
|
|
for row in rows:
|
|
|
|
label = row[-1]
|
|
|
|
if label not in counts:
|
|
|
|
counts[label] = 0
|
|
|
|
counts[label] += 1
|
|
|
|
return counts
|
|
|
|
|
|
|
|
|
|
|
|
def is_numeric(value):
|
|
|
|
return isinstance(value, int) or isinstance(value, float)
|
|
|
|
|
|
|
|
|
|
|
|
class Question:
|
|
|
|
def __init__(self, column, value):
|
|
|
|
self.column = column
|
|
|
|
self.value = value
|
|
|
|
|
|
|
|
def match(self, example):
|
|
|
|
val = example[self.column]
|
|
|
|
if is_numeric(val):
|
|
|
|
return val >= self.value
|
|
|
|
else:
|
|
|
|
return val == self.value
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
condition = "=="
|
|
|
|
if is_numeric(self.value):
|
|
|
|
condition = ">="
|
|
|
|
return "Czy %s %s %s?" % (
|
|
|
|
header[self.column], condition, str(self.value))
|
|
|
|
|
|
|
|
def partition(rows, question):
|
|
|
|
true_rows, false_rows = [], []
|
|
|
|
for row in rows:
|
|
|
|
if question.match(row):
|
|
|
|
true_rows.append(row)
|
|
|
|
else:
|
|
|
|
false_rows.append(row)
|
|
|
|
return true_rows, false_rows
|
|
|
|
|
|
|
|
|
|
|
|
def gini(rows):
|
|
|
|
counts = class_counts(rows)
|
|
|
|
impurity = 1
|
|
|
|
for lbl in counts:
|
|
|
|
prob_of_lbl = counts[lbl] / float(len(rows))
|
|
|
|
impurity -= prob_of_lbl**2
|
|
|
|
return impurity
|
|
|
|
|
|
|
|
|
|
|
|
def info_gain(left, right, current_uncertainty):
|
|
|
|
p = float(len(left)) / (len(left) + len(right))
|
|
|
|
return current_uncertainty - p * gini(left) - (1 - p) * gini(right)
|
|
|
|
|
|
|
|
|
|
|
|
def find_best_split(rows):
|
|
|
|
best_gain = 0
|
|
|
|
best_question = None
|
|
|
|
current_uncertainty = gini(rows)
|
|
|
|
n_features = len(rows[0]) - 1
|
|
|
|
for col in range(n_features):
|
|
|
|
values = set([row[col] for row in rows])
|
|
|
|
for val in values:
|
|
|
|
question = Question(col, val)
|
|
|
|
true_rows, false_rows = partition(rows, question)
|
|
|
|
if len(true_rows) == 0 or len(false_rows) == 0:
|
|
|
|
continue
|
|
|
|
gain = info_gain(true_rows, false_rows, current_uncertainty)
|
|
|
|
if gain >= best_gain:
|
|
|
|
best_gain, best_question = gain, question
|
|
|
|
return best_gain, best_question
|
|
|
|
|
|
|
|
|
|
|
|
class Leaf:
|
|
|
|
def __init__(self, rows):
|
|
|
|
self.predictions = class_counts(rows)
|
|
|
|
|
|
|
|
class Decision_Node:
|
|
|
|
def __init__(self,
|
|
|
|
question,
|
|
|
|
true_branch,
|
|
|
|
false_branch):
|
|
|
|
self.question = question
|
|
|
|
self.true_branch = true_branch
|
|
|
|
self.false_branch = false_branch
|
|
|
|
|
|
|
|
def build_tree(rows):
|
|
|
|
gain, question = find_best_split(rows)
|
|
|
|
if gain == 0:
|
|
|
|
return Leaf(rows)
|
|
|
|
true_rows, false_rows = partition(rows, question)
|
|
|
|
true_branch = build_tree(true_rows)
|
|
|
|
false_branch = build_tree(false_rows)
|
|
|
|
return Decision_Node(question, true_branch, false_branch)
|
|
|
|
|
|
|
|
|
|
|
|
def print_tree(node, spacing=""):
|
|
|
|
if isinstance(node, Leaf):
|
|
|
|
print (spacing + "Predict", node.predictions)
|
|
|
|
return
|
|
|
|
print (spacing + str(node.question))
|
|
|
|
print (spacing + '--> True:')
|
|
|
|
print_tree(node.true_branch, spacing + " ")
|
|
|
|
print (spacing + '--> False:')
|
|
|
|
print_tree(node.false_branch, spacing + " ")
|
|
|
|
|
|
|
|
|
|
|
|
def finalAnswer(row, node):
|
|
|
|
if isinstance(node, Leaf):
|
|
|
|
finalPredict=node.predictions
|
|
|
|
finalPredict=str(finalPredict)
|
|
|
|
return finalPredict[2]
|
|
|
|
if node.question.match(row):
|
|
|
|
return finalAnswer(row, node.true_branch)
|
|
|
|
else:
|
|
|
|
return finalAnswer(row, node.false_branch)
|
|
|
|
|
|
|
|
|
|
|
|
my_tree = build_tree(training_data)
|
|
|
|
#test=[3, 2, 'n', 7, 'n']
|
|
|
|
#wynik=finalAnswer(test, my_tree)
|
|
|
|
#print(wynik)
|
|
|
|
|