wstępne drzewko

2020-05-15 13:54:18 +02:00 · 2020-05-15 13:54:18 +02:00 · 893793c0bf
commit 893793c0bf
parent 677cd02e6f
1 changed files with 197 additions and 3 deletions
--- a/Kamila.py
+++ b/Kamila.py
@ -1,9 +1,203 @@
-class main():
+import pandas as pd
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.model_selection import train_test_split
+from sklearn import metrics
+import numpy
+
+
+header = ["ready", "hydration", "weeds", "planted"]
+
+
+def check(field):
+    if field == 0:
+        return [0, 0, 0, 'N']
+    elif field == 1:
+        return [0, 0, 1, 'N']
+    elif field == 2:
+        return [0, 0, 0, 'Y']
+    elif field == 3:
+        return [0, 0, 1, 'Y']
+    elif field == 4:
+        return [0, 1, 0, 'N']
+    elif field == 5:
+        return [0, 1, 1, 'N']
+    elif field == 6:
+        return [0, 1, 0, 'Y']
+    elif field == 7:
+        return [0, 1, 1, 'Y']
+    elif field == 8:
+        return [1, 0, 0, 'N']
+    else:
+        print("wrong field number")
+
+
+def un_values(rows, col):
+    return set([row[col] for row in rows])
+
+
+def class_counts(rows):
+    counts = {}
+    for row in rows:
+        label = row[-1]
+        if label not in counts:
+            counts[label] = 0
+        counts[label] += 1
+    return counts
+
+
+def is_numeric(value):
+    return isinstance(value, int) or isinstance(value, float)
+
+
+class Question():
+    def __init__(self, column, value):
+        self.column = column
+        self.value = value
+
+    def match(self, example):
+        val = example[self.column]
+        if is_numeric(val):
+            return val == self.value
+        else:
+            return val != self.value
+
+    def __repr__(self):
+        condition = "!="
+        if is_numeric(self.value):
+            condition = "=="
+        return "Is %s %s %s?" %(
+            header[self.column], condition, str(self.value)
+        )
+
+
+def partition(rows, question):
+    true_rows, false_rows = [], []
+    for row in rows:
+        if question.match(row):
+            true_rows.append(row)
+        else:
+            false_rows.append(row)
+    return true_rows, false_rows
+
+
+def gini(rows):
+    counts = class_counts(rows)
+    impurity = 1
+    for lbl in counts:
+        prob_of_lbl = counts[lbl]/float(len(rows))
+        impurity -= prob_of_lbl**2
+    return impurity
+
+
+def info_gain(left, right, current_uncertainty):
+    p = float(len(left))/(len(left) + len(right))
+    return current_uncertainty - p*gini(left) - (1-p) * gini(right)
+
+
+def find_best_split(rows):
+    best_gain = 0
+    best_question = None
+    current_uncertainty = gini(rows)
+    n_features = len(rows[0]) - 1
+
+    for col in range(n_features):
+
+        values = set([row[col] for row in rows])
+
+        for val in values:
+            question = Question(col, val)
+            true_rows, false_rows = partition(rows, question)
+            if len(true_rows) == 0 or len(false_rows) == 0:
+                continue
+            gain = info_gain(true_rows,false_rows,current_uncertainty)
+            if gain >= best_gain:
+                best_gain, best_question = gain, question
+
+    return best_gain, best_question
+
+
+class Leaf:
+    def __init__(self, rows):
+        self.predictions = class_counts(rows)
+
+
+class DecisionNode:
+    def __init__(self, question, true_branch, false_branch):
+        self.question = question
+        self.true_branch = true_branch
+        self.false_branch = false_branch
+
+
+def build_tree(rows):
+    gain, question = find_best_split(rows)
+    if gain == 0:
+        return Leaf(rows)
+    true_rows, false_rows = partition(rows, question)
+
+    true_branch = build_tree(true_rows)
+    false_branch = build_tree(false_rows)
+
+    return DecisionNode(question, true_branch, false_branch)
+
+
+def print_tree(node, spacing=""):
+    if isinstance(node, Leaf):
+        print(spacing + "Predict", node.predictions)
+        return
+
+    print(spacing + str(node.question))
+
+    print(spacing + '--> True: ')
+    print_tree(node.true_branch, spacing + " ")
+
+    print(spacing + '--> False: ')
+    print_tree(node.false_branch, spacing + " ")
+
+
+def classify(row, node):
+    if isinstance(node, Leaf):
+        return node.predictions
+    if node.question.match(row):
+        return classify(row, node.true_branch)
+    else:
+        return classify(row,node.false_branch)
+
+
+def print_leaf(counts):
+    total = sum(counts.values()) * 1.0
+    probs = {}
+    for lbl in counts.keys():
+        probs[lbl] = str(int(counts[lbl]/total * 100)) + "%"
+    return probs
+
+
+class Main():
    def __init__(self,traktor,field,ui,path):
        self.traktor = traktor
        self.field = field
        self.ui = ui
        self.path = path

-    def main(self):
-        pass
+    def tree(field):
+        array = ([[8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
+                  [7, 7, 7, 7, 7, 7, 7, 7, 7, 7],
+                  [6, 6, 6, 6, 6, 6, 6, 6, 6, 6],
+                  [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
+                  [4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
+                  [3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
+                  [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+                  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
+
+        testing_data = []
+
+        for i in range(10):
+            verse = field[i]
+            for j in verse:
+                coord = (i, j)
+                current_field = check(verse[j])
+                testing_data.append(current_field)
+
+        x = build_tree(testing_data)
+        print_tree(x)