From 893793c0bf5f06eefd40a1e8386f6290139a0db1 Mon Sep 17 00:00:00 2001 From: xkamikoo <58092037+xkamikoo@users.noreply.github.com> Date: Fri, 15 May 2020 13:54:18 +0200 Subject: [PATCH 1/5] =?UTF-8?q?wst=C4=99pne=20drzewko?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Kamila.py | 200 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 197 insertions(+), 3 deletions(-) diff --git a/Kamila.py b/Kamila.py index 5cad369..7589ff1 100644 --- a/Kamila.py +++ b/Kamila.py @@ -1,9 +1,203 @@ -class main(): +import pandas as pd +from sklearn.tree import DecisionTreeClassifier +from sklearn.model_selection import train_test_split +from sklearn import metrics +import numpy + + +header = ["ready", "hydration", "weeds", "planted"] + + +def check(field): + if field == 0: + return [0, 0, 0, 'N'] + elif field == 1: + return [0, 0, 1, 'N'] + elif field == 2: + return [0, 0, 0, 'Y'] + elif field == 3: + return [0, 0, 1, 'Y'] + elif field == 4: + return [0, 1, 0, 'N'] + elif field == 5: + return [0, 1, 1, 'N'] + elif field == 6: + return [0, 1, 0, 'Y'] + elif field == 7: + return [0, 1, 1, 'Y'] + elif field == 8: + return [1, 0, 0, 'N'] + else: + print("wrong field number") + + +def un_values(rows, col): + return set([row[col] for row in rows]) + + +def class_counts(rows): + counts = {} + for row in rows: + label = row[-1] + if label not in counts: + counts[label] = 0 + counts[label] += 1 + return counts + + +def is_numeric(value): + return isinstance(value, int) or isinstance(value, float) + + +class Question(): + def __init__(self, column, value): + self.column = column + self.value = value + + def match(self, example): + val = example[self.column] + if is_numeric(val): + return val == self.value + else: + return val != self.value + + def __repr__(self): + condition = "!=" + if is_numeric(self.value): + condition = "==" + return "Is %s %s %s?" %( + header[self.column], condition, str(self.value) + ) + + +def partition(rows, question): + true_rows, false_rows = [], [] + for row in rows: + if question.match(row): + true_rows.append(row) + else: + false_rows.append(row) + return true_rows, false_rows + + +def gini(rows): + counts = class_counts(rows) + impurity = 1 + for lbl in counts: + prob_of_lbl = counts[lbl]/float(len(rows)) + impurity -= prob_of_lbl**2 + return impurity + + +def info_gain(left, right, current_uncertainty): + p = float(len(left))/(len(left) + len(right)) + return current_uncertainty - p*gini(left) - (1-p) * gini(right) + + +def find_best_split(rows): + best_gain = 0 + best_question = None + current_uncertainty = gini(rows) + n_features = len(rows[0]) - 1 + + for col in range(n_features): + + values = set([row[col] for row in rows]) + + for val in values: + question = Question(col, val) + true_rows, false_rows = partition(rows, question) + if len(true_rows) == 0 or len(false_rows) == 0: + continue + gain = info_gain(true_rows,false_rows,current_uncertainty) + if gain >= best_gain: + best_gain, best_question = gain, question + + return best_gain, best_question + + +class Leaf: + def __init__(self, rows): + self.predictions = class_counts(rows) + + +class DecisionNode: + def __init__(self, question, true_branch, false_branch): + self.question = question + self.true_branch = true_branch + self.false_branch = false_branch + + +def build_tree(rows): + gain, question = find_best_split(rows) + if gain == 0: + return Leaf(rows) + true_rows, false_rows = partition(rows, question) + + true_branch = build_tree(true_rows) + false_branch = build_tree(false_rows) + + return DecisionNode(question, true_branch, false_branch) + + +def print_tree(node, spacing=""): + if isinstance(node, Leaf): + print(spacing + "Predict", node.predictions) + return + + print(spacing + str(node.question)) + + print(spacing + '--> True: ') + print_tree(node.true_branch, spacing + " ") + + print(spacing + '--> False: ') + print_tree(node.false_branch, spacing + " ") + + +def classify(row, node): + if isinstance(node, Leaf): + return node.predictions + if node.question.match(row): + return classify(row, node.true_branch) + else: + return classify(row,node.false_branch) + + +def print_leaf(counts): + total = sum(counts.values()) * 1.0 + probs = {} + for lbl in counts.keys(): + probs[lbl] = str(int(counts[lbl]/total * 100)) + "%" + return probs + + +class Main(): def __init__(self,traktor,field,ui,path): self.traktor = traktor self.field = field self.ui = ui self.path = path - def main(self): - pass \ No newline at end of file + def tree(field): + array = ([[8, 8, 8, 8, 8, 8, 8, 8, 8, 8], + [7, 7, 7, 7, 7, 7, 7, 7, 7, 7], + [6, 6, 6, 6, 6, 6, 6, 6, 6, 6], + [5, 5, 5, 5, 5, 5, 5, 5, 5, 5], + [4, 4, 4, 4, 4, 4, 4, 4, 4, 4], + [3, 3, 3, 3, 3, 3, 3, 3, 3, 3], + [2, 2, 2, 2, 2, 2, 2, 2, 2, 2], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) + + testing_data = [] + + for i in range(10): + verse = field[i] + for j in verse: + coord = (i, j) + current_field = check(verse[j]) + testing_data.append(current_field) + + x = build_tree(testing_data) + print_tree(x) From 18d05c29f02e9c6b86b823ac9c2de1d72a13ea59 Mon Sep 17 00:00:00 2001 From: xkamikoo <58092037+xkamikoo@users.noreply.github.com> Date: Fri, 15 May 2020 14:03:52 +0200 Subject: [PATCH 2/5] =?UTF-8?q?wst=C4=99pne=20drzewko?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Kamila.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Kamila.py b/Kamila.py index 7589ff1..5267d00 100644 --- a/Kamila.py +++ b/Kamila.py @@ -171,7 +171,7 @@ def print_leaf(counts): return probs -class Main(): +class main(): def __init__(self,traktor,field,ui,path): self.traktor = traktor self.field = field From 3666bd3079592cb5a4165ff7861a0d1d5624e0b7 Mon Sep 17 00:00:00 2001 From: xkamikoo <58092037+xkamikoo@users.noreply.github.com> Date: Sun, 17 May 2020 20:24:15 +0200 Subject: [PATCH 3/5] upgrade --- Kamila.py | 109 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 88 insertions(+), 21 deletions(-) diff --git a/Kamila.py b/Kamila.py index 5267d00..c4f22f4 100644 --- a/Kamila.py +++ b/Kamila.py @@ -4,33 +4,57 @@ from sklearn.model_selection import train_test_split from sklearn import metrics import numpy - -header = ["ready", "hydration", "weeds", "planted"] +header = ["ready", "hydration", "weeds", "empty", "TODO"] +work = ["Zebrac","Podlac","Odchwascic","Zasadzic"] +#0 - 3 +#1 - 0 +#2 - 1 +#3 - 2 +def check_p(field): + if field == 0: + return [0, 0, 0, 0, "Zasadzic"] + elif field == 1: + return [0, 0, 1, 0, "Odchwascic"] + elif field == 2: + return [0, 0, 0, 1, "Podlac"] + elif field == 3: + return [0, 0, 1, 1, "Odchwascic"] + elif field == 4: + return [0, 1, 0, 0, "Zasadzic"] + elif field == 5: + return [0, 1, 1, 0, "Odchwascic"] + elif field == 6: + return [0, 1, 0, 1, "Ignoruj"] + elif field == 7: + return [0, 1, 1, 1, "Odchwascic"] + elif field == 8: + return [1, 0, 0, 1, "Zebrac"] + else: + print("wrong field number") def check(field): if field == 0: - return [0, 0, 0, 'N'] + return [[0, 0, 0, 1, "Zasadzic"],[0,0,0,1,"Podlac"]] elif field == 1: - return [0, 0, 1, 'N'] + return [[0, 0, 1, 1, "Odchwascic"], [0,0,1,1,"Podlac"], [0,0,1,1,"Zasadzic"]] elif field == 2: - return [0, 0, 0, 'Y'] + return [[0, 0, 0, 0, "Podlac"]] elif field == 3: - return [0, 0, 1, 'Y'] + return [[0, 0, 1, 0, "Odchwascic"],[0,0,1,0,"Podlac"]] elif field == 4: - return [0, 1, 0, 'N'] + return [[0, 1, 0, 1, "Zasadzic"]] elif field == 5: - return [0, 1, 1, 'N'] + return [[0, 1, 1, 1, "Odchwascic"],[0,1,1,1,"Zasadzic"]] elif field == 6: - return [0, 1, 0, 'Y'] + return [] elif field == 7: - return [0, 1, 1, 'Y'] + return [[0, 1, 1, 0, "Odchwascic"]] elif field == 8: - return [1, 0, 0, 'N'] + return [[1, 0, 0, 0, "Zebrac"],[1, 0, 0, 0, "Potem podlac"],[1, 0, 0, 0, "Potem zasadzic"]] else: print("wrong field number") - def un_values(rows, col): return set([row[col] for row in rows]) @@ -177,8 +201,8 @@ class main(): self.field = field self.ui = ui self.path = path - - def tree(field): + self.best_action = 0 + def main(self): array = ([[8, 8, 8, 8, 8, 8, 8, 8, 8, 8], [7, 7, 7, 7, 7, 7, 7, 7, 7, 7], [6, 6, 6, 6, 6, 6, 6, 6, 6, 6], @@ -189,15 +213,58 @@ class main(): [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) + while (self.best_action != -1): + self.find_best_action() + self.do_best_action() + print("Koniec roboty") + + def find_best_action(self): testing_data = [] - + matrix = self.field.get_matrix() + matrix_todo = [] + #print(self.field) for i in range(10): - verse = field[i] - for j in verse: + matrix_todo.append([]) + verse = matrix[i] + for j in range(len(verse)): coord = (i, j) - current_field = check(verse[j]) - testing_data.append(current_field) + current_field = check(verse[j]) #czynnosci ktore trzeba jeszcze zrobic na kazdym polu + matrix_todo[i].append([]) + for action in current_field: + matrix_todo[i][j].append(action[-1]) + testing_data.extend(current_field) + #testing_data.append(current_field) + if len(testing_data) > 0: + x = build_tree(testing_data) + print_tree(x) + if isinstance(x, Leaf): + self.best_action = self.find_remaining_action(matrix_todo) + return + self.best_action = x.question.column + print(header[x.question.column]) + print(x.question.value) + else: + self.best_action = self.find_remaining_action(matrix_todo) + return + #for row in testing_data: + # print("Actual: %s. Predicted %s" % + # (row[-1], print_leaf(classify(row, x)))) + #for row in matrix_todo: + # print(row) - x = build_tree(testing_data) - print_tree(x) + def do_best_action(self): + self.traktor.set_mode((self.best_action+3) % 4) + while self.path.pathfinding(self.traktor,self.field,self.ui) != 0: + pass +# 0 - 3 +# 1 - 0 +# 2 - 1 +# 3 - 2 + def find_remaining_action(self, matrix_todo): + for row in matrix_todo: + for field in row: + for action in field: + print(action) + return work.index(action) + return -1 \ No newline at end of file From 89ac7e6da6db3b82421ebfbfb9348ad6d101e5af Mon Sep 17 00:00:00 2001 From: xkamikoo <58092037+xkamikoo@users.noreply.github.com> Date: Mon, 18 May 2020 17:13:09 +0200 Subject: [PATCH 4/5] Gotowy podprojekt --- Kamila.py | 120 ++++++++++++++++++------------------------------------ 1 file changed, 39 insertions(+), 81 deletions(-) diff --git a/Kamila.py b/Kamila.py index c4f22f4..6b7e2ac 100644 --- a/Kamila.py +++ b/Kamila.py @@ -4,61 +4,34 @@ from sklearn.model_selection import train_test_split from sklearn import metrics import numpy -header = ["ready", "hydration", "weeds", "empty", "TODO"] -work = ["Zebrac","Podlac","Odchwascic","Zasadzic"] -#0 - 3 -#1 - 0 -#2 - 1 -#3 - 2 -def check_p(field): - if field == 0: - return [0, 0, 0, 0, "Zasadzic"] - elif field == 1: - return [0, 0, 1, 0, "Odchwascic"] - elif field == 2: - return [0, 0, 0, 1, "Podlac"] - elif field == 3: - return [0, 0, 1, 1, "Odchwascic"] - elif field == 4: - return [0, 1, 0, 0, "Zasadzic"] - elif field == 5: - return [0, 1, 1, 0, "Odchwascic"] - elif field == 6: - return [0, 1, 0, 1, "Ignoruj"] - elif field == 7: - return [0, 1, 1, 1, "Odchwascic"] - elif field == 8: - return [1, 0, 0, 1, "Zebrac"] - else: - print("wrong field number") +header = ["hydration", "weeds", "empty", "ready", "TODO"] +work = ["Podlac", "Odchwascic", "Zasadzic", "Zebrac"] def check(field): if field == 0: - return [[0, 0, 0, 1, "Zasadzic"],[0,0,0,1,"Podlac"]] + return [[0, 0, 1, 0, "Zasadzic"], [0, 0, 1, 0, "Podlac"]] elif field == 1: - return [[0, 0, 1, 1, "Odchwascic"], [0,0,1,1,"Podlac"], [0,0,1,1,"Zasadzic"]] + return [[0, 1, 1, 0, "Odchwascic"], [0, 1, 1, 0, "Podlac"], [0, 1, 1, 0, "Zasadzic"]] elif field == 2: return [[0, 0, 0, 0, "Podlac"]] elif field == 3: - return [[0, 0, 1, 0, "Odchwascic"],[0,0,1,0,"Podlac"]] + return [[0, 1, 0, 0, "Odchwascic"], [0, 1, 0, 0, "Podlac"]] elif field == 4: - return [[0, 1, 0, 1, "Zasadzic"]] + return [[1, 0, 1, 0, "Zasadzic"]] elif field == 5: - return [[0, 1, 1, 1, "Odchwascic"],[0,1,1,1,"Zasadzic"]] + return [[1, 1, 1, 0, "Odchwascic"], [1, 1, 1, 0, "Zasadzic"]] elif field == 6: return [] elif field == 7: - return [[0, 1, 1, 0, "Odchwascic"]] + return [[1, 1, 0, 0, "Odchwascic"]] elif field == 8: - return [[1, 0, 0, 0, "Zebrac"],[1, 0, 0, 0, "Potem podlac"],[1, 0, 0, 0, "Potem zasadzic"]] + return [[0, 0, 0, 1, "Zebrac"], [0, 0, 0, 1, "Potem podlac"], [0, 0, 0, 1, "Potem zasadzic"]] else: print("wrong field number") -def un_values(rows, col): - return set([row[col] for row in rows]) - +# liczenie ilości prac do wykonania def class_counts(rows): counts = {} for row in rows: @@ -69,10 +42,12 @@ def class_counts(rows): return counts +# sprawdzenie czy wartość jest liczbą def is_numeric(value): return isinstance(value, int) or isinstance(value, float) +# klasa tworząca zapytanie do podziału danych class Question(): def __init__(self, column, value): self.column = column @@ -82,18 +57,17 @@ class Question(): val = example[self.column] if is_numeric(val): return val == self.value - else: - return val != self.value + # wyświetlenie pytania def __repr__(self): - condition = "!=" if is_numeric(self.value): condition = "==" - return "Is %s %s %s?" %( + return "Is %s %s %s?" % ( header[self.column], condition, str(self.value) ) +# podział danych na spełnione i niespełnione wiersze def partition(rows, question): true_rows, false_rows = [], [] for row in rows: @@ -104,20 +78,22 @@ def partition(rows, question): return true_rows, false_rows +# funkcja implementująca indeks gini def gini(rows): counts = class_counts(rows) impurity = 1 for lbl in counts: - prob_of_lbl = counts[lbl]/float(len(rows)) - impurity -= prob_of_lbl**2 + prob_of_lbl = counts[lbl] / float(len(rows)) + impurity -= prob_of_lbl ** 2 return impurity def info_gain(left, right, current_uncertainty): - p = float(len(left))/(len(left) + len(right)) - return current_uncertainty - p*gini(left) - (1-p) * gini(right) + p = float(len(left)) / (len(left) + len(right)) + return current_uncertainty - p * gini(left) - (1 - p) * gini(right) +# znalezienie najlepszego "miejsca" na podział danych def find_best_split(rows): best_gain = 0 best_question = None @@ -133,7 +109,7 @@ def find_best_split(rows): true_rows, false_rows = partition(rows, question) if len(true_rows) == 0 or len(false_rows) == 0: continue - gain = info_gain(true_rows,false_rows,current_uncertainty) + gain = info_gain(true_rows, false_rows, current_uncertainty) if gain >= best_gain: best_gain, best_question = gain, question @@ -152,6 +128,7 @@ class DecisionNode: self.false_branch = false_branch +# funkcja budująca drzewo def build_tree(rows): gain, question = find_best_split(rows) if gain == 0: @@ -164,6 +141,7 @@ def build_tree(rows): return DecisionNode(question, true_branch, false_branch) +# funcka wypisująca drzewo def print_tree(node, spacing=""): if isinstance(node, Leaf): print(spacing + "Predict", node.predictions) @@ -178,31 +156,16 @@ def print_tree(node, spacing=""): print_tree(node.false_branch, spacing + " ") -def classify(row, node): - if isinstance(node, Leaf): - return node.predictions - if node.question.match(row): - return classify(row, node.true_branch) - else: - return classify(row,node.false_branch) - - -def print_leaf(counts): - total = sum(counts.values()) * 1.0 - probs = {} - for lbl in counts.keys(): - probs[lbl] = str(int(counts[lbl]/total * 100)) + "%" - return probs - - class main(): - def __init__(self,traktor,field,ui,path): + def __init__(self, traktor, field, ui, path): self.traktor = traktor self.field = field self.ui = ui self.path = path self.best_action = 0 + def main(self): + # dane testowe array = ([[8, 8, 8, 8, 8, 8, 8, 8, 8, 8], [7, 7, 7, 7, 7, 7, 7, 7, 7, 7], [6, 6, 6, 6, 6, 6, 6, 6, 6, 6], @@ -213,28 +176,30 @@ class main(): [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) - while (self.best_action != -1): + + while (True): self.find_best_action() + if self.best_action == -1: + break self.do_best_action() print("Koniec roboty") - def find_best_action(self): testing_data = [] matrix = self.field.get_matrix() matrix_todo = [] - #print(self.field) + # print(self.field) for i in range(10): matrix_todo.append([]) verse = matrix[i] for j in range(len(verse)): coord = (i, j) - current_field = check(verse[j]) #czynnosci ktore trzeba jeszcze zrobic na kazdym polu + current_field = check(verse[j]) # czynnosci ktore trzeba jeszcze zrobic na kazdym polu matrix_todo[i].append([]) for action in current_field: matrix_todo[i][j].append(action[-1]) testing_data.extend(current_field) - #testing_data.append(current_field) + # testing_data.append(current_field) if len(testing_data) > 0: x = build_tree(testing_data) print_tree(x) @@ -247,24 +212,17 @@ class main(): else: self.best_action = self.find_remaining_action(matrix_todo) return - #for row in testing_data: - # print("Actual: %s. Predicted %s" % - # (row[-1], print_leaf(classify(row, x)))) - #for row in matrix_todo: - # print(row) def do_best_action(self): - self.traktor.set_mode((self.best_action+3) % 4) - while self.path.pathfinding(self.traktor,self.field,self.ui) != 0: + self.traktor.set_mode(self.best_action) + while self.path.pathfinding(self.traktor, self.field, self.ui) != 0: pass -# 0 - 3 -# 1 - 0 -# 2 - 1 -# 3 - 2 + + def find_remaining_action(self, matrix_todo): for row in matrix_todo: for field in row: for action in field: print(action) return work.index(action) - return -1 \ No newline at end of file + return -1 From 46c497ae5191701bc6188e75daa9e4b1576e164a Mon Sep 17 00:00:00 2001 From: xkamikoo <58092037+xkamikoo@users.noreply.github.com> Date: Mon, 18 May 2020 20:12:11 +0200 Subject: [PATCH 5/5] raport - decision tree --- decisiontree.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 decisiontree.md diff --git a/decisiontree.md b/decisiontree.md new file mode 100644 index 0000000..1835440 --- /dev/null +++ b/decisiontree.md @@ -0,0 +1,11 @@ +# Sztuczna Inteligencja - Raport + +**Członkowie zespołu:** Marcin Kwapisz, Kamila Matysiak, Piotr Rychlicki, Justyna Zarzycka + +**Temat podprojektu:** Wybór trybu pracy traktora za pomocą drzewa decyzyjnego + +**Autor podprojektu:** Kamila Matysiak + + +### Drzewo Decyzyjne +