diff --git a/dane.txt b/dane.txt new file mode 100644 index 0000000..cb1c363 --- /dev/null +++ b/dane.txt @@ -0,0 +1,625 @@ +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n +z s s z +z n c k +j n s n diff --git a/decyzje.txt b/decyzje.txt new file mode 100644 index 0000000..abd4232 --- /dev/null +++ b/decyzje.txt @@ -0,0 +1,625 @@ +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. +B +Z +. diff --git a/drzewaDecyzyjne.py b/drzewaDecyzyjne.py new file mode 100644 index 0000000..cb6ea39 --- /dev/null +++ b/drzewaDecyzyjne.py @@ -0,0 +1,165 @@ +training_data = [ + #zyznosc, nawodnienie, cien, kwasowosc + ['z', 'n', 's', 'z', 1], + ['z', 'n', 's', 'n', 1], + ['j', 'n', 's', 'z', 1], + ['z', 's', 's', 'n', 1], + ['j', 'n', 'c', 'n', 1], + ['z', 'n', 's', 'k', 1], + ['z', 'n', 'c', 'k', 2], + ['z', 's', 's', 'k', 2], + ['z', 's', 'c', 'k', 2], + ['j', 'n', 's', 'k', 2], + ['z', 's', 'c', 'z', 3], + ['j', 'n', 's', 'n', 3] +] + +header = ["zyznosc", "nawodnienie", "cien", "kwasowosc", "wybor"] + +def class_counts(rows): + counts = {} + for row in rows: + label = row[-1] + if label not in counts: + counts[label] = 0 + counts[label] += 1 + return counts + + +def is_numeric(value): + return isinstance(value, int) or isinstance(value, float) + + +class Question: + def __init__(self, column, value): + self.column = column + self.value = value + + def match(self, example): + val = example[self.column] + if is_numeric(val): + return val >= self.value + else: + return val == self.value + + def __repr__(self): + condition = "==" + if is_numeric(self.value): + condition = ">=" + return "Czy %s %s %s?" % ( + header[self.column], condition, str(self.value)) + +def partition(rows, question): + true_rows, false_rows = [], [] + for row in rows: + if question.match(row): + true_rows.append(row) + else: + false_rows.append(row) + return true_rows, false_rows + + +def gini(rows): + counts = class_counts(rows) + impurity = 1 + for lbl in counts: + prob_of_lbl = counts[lbl] / float(len(rows)) + impurity -= prob_of_lbl**2 + return impurity + + +def info_gain(left, right, current_uncertainty): + p = float(len(left)) / (len(left) + len(right)) + return current_uncertainty - p * gini(left) - (1 - p) * gini(right) + + +def find_best_split(rows): + best_gain = 0 + best_question = None + current_uncertainty = gini(rows) + n_features = len(rows[0]) - 1 + for col in range(n_features): + values = set([row[col] for row in rows]) + for val in values: + question = Question(col, val) + true_rows, false_rows = partition(rows, question) + if len(true_rows) == 0 or len(false_rows) == 0: + continue + gain = info_gain(true_rows, false_rows, current_uncertainty) + if gain >= best_gain: + best_gain, best_question = gain, question + return best_gain, best_question + + +class Leaf: + def __init__(self, rows): + self.predictions = class_counts(rows) + +class Decision_Node: + def __init__(self, + question, + true_branch, + false_branch): + self.question = question + self.true_branch = true_branch + self.false_branch = false_branch + +def build_tree(rows): + gain, question = find_best_split(rows) + if gain == 0: + return Leaf(rows) + true_rows, false_rows = partition(rows, question) + true_branch = build_tree(true_rows) + false_branch = build_tree(false_rows) + return Decision_Node(question, true_branch, false_branch) + + +def print_tree(node, spacing=""): + if isinstance(node, Leaf): + print (spacing + "Predict", node.predictions) + return + print (spacing + str(node.question)) + print (spacing + '--> True:') + print_tree(node.true_branch, spacing + " ") + print (spacing + '--> False:') + print_tree(node.false_branch, spacing + " ") + + +my_tree = build_tree(training_data) + +print_tree(my_tree) + +def classify(row, node): + if isinstance(node, Leaf): + return node.predictions + if node.question.match(row): + return classify(row, node.true_branch) + else: + return classify(row, node.false_branch) + +def print_leaf(counts): + total = sum(counts.values()) * 1.0 + probs = {} + for lbl in counts.keys(): + probs[lbl] = str(int(counts[lbl] / total * 100)) + "%" + return probs + + +with open( 'dane.txt', "r" ) as f: + testing_data = [ line.split() for line in f ] + + +file = open("decyzje.txt", "w") +file.write("") +file.close() + +for row in testing_data: + pom = print_leaf(classify(row, my_tree)) + f = open("decyzje.txt", "a") + if pom == {1: '100%'}: + f.write("B\n") + if pom == {2: '100%'}: + f.write("Z\n") + if pom == {3: '100%'}: + f.write(".\n") + f.close() \ No newline at end of file diff --git a/initial.txt b/initial.txt index e69de29..06d7405 100644 Binary files a/initial.txt and b/initial.txt differ