dt prototype

2020-05-17 21:15:46 +00:00 · 2020-05-17 21:15:46 +00:00 · f76f0c2639
commit f76f0c2639
parent 99dfa56d8d
1 changed files with 150 additions and 0 deletions
--- a/main.py
+++ b/main.py
@ -46,6 +46,156 @@ menu = Context.fromstring(''' |meat|salad|meal|drink|cold|hot |
 #print(func_output)
 '''
 def uniq_val_from_data(rows, col):
    return set([row[col] for row in rows])
 def class_counts(rows):
    counts = {}
    for row in rows:
        label = row[-1]
        if label not in counts:
            counts[label] = 0
        counts[label] += 1
    return counts
 def isnumer(value):    
    return isinstance(value, int) or isinstance(value, float)
 header = ...
 class Question():
    def __init__(self, column, value):
        self.column = column
        self.value = value
    def compare(self, example):
        val = example[self.column]
        if isnumer(val):
            return val >= self.value
        else:
            return val == self.value
    def __repr__(self):
        condition = "=="
        if isnumer(self.value):
        condition = ">="
        return "Is %s %s %s?" % (header[self.column], condition, str(self.value))
 def partition(rows, quest):
    t_rows, f_rows = [], []
    for rows in rows:
        if quest.compare(row)
            t_rows.append(row)
        else:
            f_rows.append(row)
    return t_rows, f_rows
 def gini(rows):
    counts = class_counts(rows)    
    impurity = 1
    for lbl in counts:
        prob_of_lbl = counts[lbl] / float(lem(rows))
        impurity -= prob_of_lbl**2
    return impurity
 def info_gain(l,r, current_uncertainty):
    p = float(len(l)) / (len(l) + len(r))    
    return current_uncertainty - p*gini(l) - (1-p)*gini(r)
 def find_best_q(rows):
    best_gain = 0
    best_quest = None
    current_uncertainty = gini(rows)
    n_features = len(rows[0]) - 1
    for col in range(n_feat):
        values = set([row[col] for row in rows])
        for cal in values:
            quest = Question(col, val)
            t_rows, f_rows = partition(rows, quest)
            if len(t_rows) == 0 or len(f_rows) == 0Ж
                continue
            fain = info_gain(t_rows, f_rows, current_uncertainty)
            if gain >= best gain:
                best_gain, best_quest = gain, quest
    return best_gain, best_quest
 class Leaf:
    def __init__(self,rows):
        self.predicts = class_counts(rows)    
 class Decision_Node():
    def __init__(self, quest, t_branch, f_branch):
        self.quest = quest
        self.t_branch = t_branch
        self.f_branch = f_branch
 def build_tree():   
    gain, quest = find_best_q(rows)
    if gain == 0:
        return Leaf(rows)
    t_rows, f_rows = partition(rows, quest)
    t_branch = build_tree(t_rows)
    f_branch = build_tree(f_rows)
    return Decision_Node(quest, t_branch, f_branch)
 def print_tree(node):
    if isinstance(node, leaf):
        print("" + "Predict", node.predictions)
        return
    print("" + str(node.quest))
    print("" + '--> True:')
    print_tree(node.t_branch, ""+ "  ")
    print("" + '--> False:')
    print_tree(node.f_branch,"" + "  ")
 def classify(row, node):
    if isinstance(node, leaf):
        return node.predictions
    if node.quest.compare(row):
        return classify(row, node.t_branch)
    else:
        return classify(row, node.f_branch)
 def print_leaf(counts):
    total = sum(counts.values())*1.0
    probs = {}
    for lbl in counts.keys():
        probs[lbl] = str(int(counts[lbl] / total*100)) + "%"
    return probs
 '''
 ###
 class Node:
    def __init__(self, state, parent, action):