Prześlij pliki do 'src'

Decision Tree
2020-05-19 16:17:33 +00:00 · 2020-05-19 16:17:33 +00:00 · 8396b810bb
commit 8396b810bb
parent dc8e83e6b2
1 changed files with 125 additions and 0 deletions
--- a/src/decisionTree.py
+++ b/src/decisionTree.py
@ -0,0 +1,125 @@
+import numpy as np
+import pandas as pd
+import pprint
+
+from src.graphics import *
+from .waiter import Waiter
+
+eps = np.finfo(float).eps
+tasksList = []
+tasksQueue = []
+
+class DecisionTree:
+    def __init__(self):
+        graphics = Graphics()
+        self.waiter = Waiter(graphics)
+
+    def BuildDf(self):
+        actionName = 'order,order,order,order,order,order,order,order,order,order,order,order,order,order,order,order,order,order,order,order,order,order,order,order,order,order,order,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,goToBar,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,eat,check,check,check,check,check,check,check,check,check,check,check,check,check,check,check,check,check,check,check,check,check,check,check,check,check,check,check'.split(',')
+        distance = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27'.split(',')
+        priority = '1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4'.split(',')
+
+        dataset ={'actionName':actionName,'distance':distance,'priority':priority}
+        df = pd.DataFrame(dataset,columns=['actionName','distance','priority'])
+        return df
+
+    #Obliczanie entropii dla calego zestawu
+    def FindPriorityEntropy(self,df):
+        entropyNode = 0
+        values = df.priority.unique()
+        for value in values:
+            propability = df.priority.value_counts()[value]/len(df.priority)
+            entropyNode += -propability*np.log2(propability)
+        return entropyNode
+
+    #Obliczanie entropii dla wszystkich atrybut<75>w
+    def FindAttributesEntropy(self, df, attribute):
+        targetVariables = df.priority.unique()
+        variables = df[attribute].unique()
+
+        entropy2 = 0
+        for variable in variables:
+            entropy = 0
+            for targetVariable in targetVariables:
+                num = len(df[attribute][df[attribute]==variable][df.priority == targetVariable])
+                den = len(df[attribute][df[attribute]==variable])
+                propability = num/(den + eps)
+                entropy += propability*np.log2(propability+eps)
+            propability2 = den/len(df)
+            entropy2 += -propability2*entropy
+        return abs(entropy2)
+
+    #Znajdowanie wierzcholka o najwyzszym info Gain
+    def FindWinner(self, df):
+        infoGain = []
+        for key in df.keys()[:-1]:
+            infoGain.append(self.FindPriorityEntropy(df) - self.FindAttributesEntropy(df, key))
+        return df.keys()[:-1][np.argmax(infoGain)]
+
+    def GetSubtable(self, df, node, value):
+        return df[df[node] == value].reset_index(drop=True)
+
+    #Budowanie drzewa
+    def BuildTree(self, df, tree=None):
+        node = self.FindWinner(df)
+
+        attValues = np.unique(df[node])
+
+        if tree is None:
+            tree = {}
+            tree[node] = {}
+
+        for value in attValues:
+            subtable = self.GetSubtable(df, node, value)
+            clValue,counts = np.unique(subtable['priority'],return_counts=True) 
+
+            if len(counts) == 1:
+                tree[node][value] = clValue[0]
+            else:
+                tree[node][value] = self.BuildTree(subtable)
+
+        return tree
+
+
+    #Dodawanie zadan do listy zadan
+    def TasksList(self, name, coordinate):
+        waiterNode = self.waiter.Node()
+        distance = abs(waiterNode[0] - coordinate[0]) + abs(waiterNode[1] - coordinate[1])
+
+        tasksList.append([name, distance])
+
+    #Kolejkowanie zadan
+    def Queue(self, tasksList):
+        df = self.BuildDf()
+        tree = self.BuildTree(df)
+
+        winnerNode = self.FindWinner(df)
+
+        for i in tasksList:
+            if winnerNode is "actionName":
+
+                subtable = tree[winnerNode][i[0]]
+                if subtable in ['0','1','2','3']:
+                    tasksQueue.append([i[0], i[1], subtable])
+                else:
+                    tasksQueue.append([i[0], i[1], tree[winnerNode][i[0]]['distance'][str(i[1])]])
+            elif winnerNode is "distance":
+
+                subtable = tree[winnerNode][i[1]]
+                if subtable in ['0','1','2','3']:
+                    tasksQueue.append([i[0], i[1], subtable])
+                else:
+                    tasksQueue.append([i[0], i[1], tree[winnerNode][i[1]]['actionName'][str(i[0])]])
+
+
+        tasksQueue.sort(key=lambda x: x[2])
+        print(tasksQueue)
+
+    def print(self):
+        df = self.BuildDf()
+        #a_entropy = {k:self.FindAttributesEntropy(df,k) for k in df.keys()[:-1]}
+        #print(a_entropy)
+        #print('\n Info Gain: ', self.FindWinner(df))
+        print(tasksList)
+        self.Queue(tasksList)
+