Projekt_AI-Automatyczny_saper/Engine/DecisionTree.py

import pprint

import numpy as np
import pandas as pd

eps = np.finfo(float).eps
from numpy import log2 as log

class DecisionTree:

    def __init__(self):
        df = pd.read_csv("out.csv")
        self.tree = self.buildTree(df)
        pprint.pprint(self.tree)


    def find_entropy(self,df):
        Class = df.keys()[-1]  # To make the code generic, changing target variable class name
        entropy = 0
        values = df[Class].unique()
        for value in values:
            fraction = df[Class].value_counts()[value] / len(df[Class])
            entropy += -fraction * np.log2(fraction)
        return entropy


    def find_entropy_attribute(self,df, attribute):
        Class = df.keys()[-1]  # To make the code generic, changing target variable class name
        target_variables = df[Class].unique()  # This gives all 'Yes' and 'No'
        variables = df[
            attribute].unique()  # This gives different features in that attribute (like 'Hot','Cold' in Temperature)
        entropy2 = 0
        for variable in variables:
            entropy = 0
            for target_variable in target_variables:
                num = len(df[attribute][df[attribute] == variable][df[Class] == target_variable])
                den = len(df[attribute][df[attribute] == variable])
                fraction = num / (den + eps)
                entropy += -fraction * log(fraction + eps)
            fraction2 = den / len(df)
            entropy2 += -fraction2 * entropy
        return abs(entropy2)


    def find_winner(self,df):
        Entropy_att = []
        IG = []
        for key in df.keys()[:-1]:
            #         Entropy_att.append(find_entropy_attribute(df,key))
            IG.append(self.find_entropy(df) - self.find_entropy_attribute(df, key))
        return df.keys()[:-1][np.argmax(IG)]


    def get_subtable(self,df, node, value):
        return df[df[node] == value].reset_index(drop=True)


    def buildTree(self,df, tree=None):
        Class = df.keys()[-1]  # To make the code generic, changing target variable class name

        # Here we build our decision tree

        # Get attribute with maximum information gain
        node = self.find_winner(df)

        # Get distinct value of that attribute e.g Salary is node and Low,Med and High are values
        attValue = np.unique(df[node])

        # Create an empty dictionary to create tree
        if tree is None:
            tree = {}
            tree[node] = {}

        # We make loop to construct a tree by calling this function recursively.
        # In this we check if the subset is pure and stops if it is pure.

        for value in attValue:

            subtable = self.get_subtable(df, node, value)
            clValue, counts = np.unique(subtable['action'], return_counts=True)

            if len(counts) == 1:  # Checking purity of subset
                tree[node][value] = clValue[0]
            else:
                tree[node][value] = self.buildTree(subtable)  # Calling the function recursively
        return tree

    def key(self,t, bomb):
        if t in ["poligon", "defuse", "detonate"]:
            print(t)
            return
        for x in t.keys():
            if x == "defusable":
                for y in t[x].keys():
                    if y == bomb.getDefusable():
                        self.key(t[x][y],bomb)
            if x == "bomb_type":
                for y in t[x].keys():
                    if y == bomb.getBombType():
                        self.key(t[x][y],bomb)
            if x == "size":
                for y in t[x].keys():
                    if y == bomb.getSize():
                        self.key(t[x][y],bomb)
            if x == "detonation_duration":
                for y in t[x].keys():
                    if y == bomb.getDetonationDuration():
                        self.key(t[x][y],bomb)
            if x == "detonation_area":
                for y in t[x].keys():
                    if y == bomb.getDetonationArea():
                        self.key(t[x][y],bomb)