Projekt_AI-Automatyczny_saper/Engine/DecisionTree.py

import numpy as np
import pandas as pd
import json

eps = np.finfo(float).eps
from numpy import log2 as log
LEAVES = 'action'

class DecisionTree:

    def __init__(self, doCreation):
        if doCreation:
            return
        with open('DecisionTree.json', 'r') as fp:
            self.tree = json.load(fp)

    def find_entropy(self,df):
        Class = df.keys()[-1]  # To make the code generic, changing target variable class name
        entropy = 0
        values = df[Class].unique()
        for value in values:
            fraction = df[Class].value_counts()[value] / len(df[Class])
            entropy += -fraction * np.log2(fraction)
        return entropy


    def find_entropy_attribute(self,df, attribute):
        Class = df.keys()[-1]  # To make the code generic, changing target variable class name
        target_variables = df[Class].unique()  # This gives all 'Yes' and 'No'
        variables = df[
            attribute].unique()  # This gives different features in that attribute (like 'Hot','Cold' in Temperature)
        entropy2 = 0
        for variable in variables:
            entropy = 0
            for target_variable in target_variables:
                num = len(df[attribute][df[attribute] == variable][df[Class] == target_variable])
                den = len(df[attribute][df[attribute] == variable])
                fraction = num / (den + eps)
                entropy += -fraction * log(fraction + eps)
            fraction2 = den / len(df)
            entropy2 += -fraction2 * entropy
        return abs(entropy2)


    def find_winner(self,df):
        Entropy_att = []
        IG = []
        for key in df.keys()[:-1]:
            #         Entropy_att.append(find_entropy_attribute(df,key))
            IG.append(self.find_entropy(df) - self.find_entropy_attribute(df, key))
        return df.keys()[:-1][np.argmax(IG)]


    def get_subtable(self,df, node, value):
        return df[df[node] == value].reset_index(drop=True)


    def buildTree(self,df, tree=None):
        Class = df.keys()[-1]  # To make the code generic, changing target variable class name

        # Here we build our decision tree

        # Get attribute with maximum information gain
        node = self.find_winner(df)

        # Get distinct value of that attribute e.g Salary is node and Low,Med and High are values
        attValue = np.unique(df[node])

        # Create an empty dictionary to create tree
        if tree is None:
            tree = {}
            tree[node] = {}

        # We make loop to construct a tree by calling this function recursively.
        # In this we check if the subset is pure and stops if it is pure.

        for value in attValue:

            subtable = self.get_subtable(df, node, value)
            clValue, counts = np.unique(subtable[LEAVES], return_counts=True)

            if len(counts) == 1:  # Checking purity of subset
                tree[node][value] = clValue[0]
            else:
                tree[node][value] = self.buildTree(subtable)  # Calling the function recursively
        with open('C:\\Users\\kratu\\PycharmProjects\\Projekt_AI-Automatyczny_saper\\DecisionTree.json', 'w') as fp:
            json.dump(tree,fp)
        return tree

    def key(self,t, bomb):
        if t in ["poligon", "defuse", "detonate"]:
            print(t)
            return
        for x in t.keys():
            if x == "defusable":
                for y in t[x].keys():
                    if y == bomb.getDefusable():
                        self.key(t[x][y],bomb)
            if x == "bomb_type":
                for y in t[x].keys():
                    if y == bomb.getBombType():
                        self.key(t[x][y],bomb)
            if x == "size":
                for y in t[x].keys():
                    if y == bomb.getSize():
                        self.key(t[x][y],bomb)
            if x == "detonation_duration":
                for y in t[x].keys():
                    if y == bomb.getDetonationDuration():
                        self.key(t[x][y],bomb)
            if x == "detonation_area":
                for y in t[x].keys():
                    if y == bomb.getDetonationArea():
                        self.key(t[x][y],bomb)

if __name__ == "__main__":
    data = pd.read_csv("C:\\Users\\kratu\\PycharmProjects\\Projekt_AI-Automatyczny_saper\\out.csv")
    DecisionTree(True).buildTree(data)