ID3

2020-05-26 21:16:30 +00:00 · 2020-05-26 21:16:30 +00:00 · 7930f1fab7
commit 7930f1fab7
parent ab08c46c63
4 changed files with 446 additions and 0 deletions
--- a/Podprojekt_s444426/ID3.py
+++ b/Podprojekt_s444426/ID3.py
@ -0,0 +1,157 @@
+import pandas as pd
+import numpy as np
+from pprint import pprint
+import dataset
+
+training_data = pd.DataFrame(data=dataset.training_data, columns=dataset.header)
+testing_data = pd.DataFrame(data=dataset.testing_data, columns=dataset.header)
+
+
+def entropy(target_col):
+    """
+    Obliczenie warości entropii dla wskazanej kolumny
+    """
+    values, counts = np.unique(target_col, return_counts=True)
+    entropy = np.sum(
+        [(-counts[i] / np.sum(counts)) * np.log2(counts[i] / np.sum(counts)) for i in range(len(values))])
+    return entropy
+
+
+def info_gain(data, split_attribute_name, target_name="label"):
+    """
+    Obliczenie wartości przyrostu informacji dla wskazanego atrybutu (split_attribute_name)
+     w podanym zbiorze (data)
+    """
+
+    # Wartość entropii zbioru
+    total_entropy = entropy(data[target_name])
+
+    # Wyodrębnienie poszczególnych "podzbiorów"
+    vals, counts = np.unique(data[split_attribute_name], return_counts=True)
+
+    # Średnia ważona entropii każdego podzbioru
+    weighted_entropy = np.sum(
+        [(counts[i] / np.sum(counts)) * entropy(data.where(data[split_attribute_name] == vals[i]).dropna()[target_name])
+         for i in range(len(vals))])
+
+    # Przyrost informacji
+    information_gain = total_entropy - weighted_entropy
+
+    return information_gain
+
+
+def ID3(data, original_data, features, target_attribute_name="label", parent_node_class=None):
+    """
+    Algorytm ID3
+
+    parametry:
+        data                    zbiór danych, dla którego poszukujemy drzewa decyzyjnego
+        original_data           oryginalny zbiór danych (zwracany gdy data == None)
+        features                lista atrybutów wejściowego zbioru
+        target_attribute_name   docelowy atrybut, który chcemy przewidzieć
+        parent_node_class       nadrzędna wartość
+    """
+
+    # Jeżeli wszystkie atrybuty są takie same, zwracamy liść z pierwszą napotkaną wartością
+
+    if len(np.unique(data[target_attribute_name])) <= 1:
+        return np.unique(data[target_attribute_name])[0]
+
+    elif len(data) == 0:
+        return np.unique(original_data[target_attribute_name])[
+            np.argmax(np.unique(original_data[target_attribute_name], return_counts=True)[1])]
+
+    elif len(features) == 0:
+        return parent_node_class
+
+    else:
+
+        # Aktualizacja nadrzędnej wartości
+        parent_node_class = np.unique(data[target_attribute_name])[
+            np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])]
+
+        # Obliczenie przyrostu informacji dla każdego potencjalnego atrybutu,
+        # według którego nastąpi podział zbioru
+        item_values = [info_gain(data, feature, target_attribute_name) for feature in
+                       features]
+
+        # Najlepszym atrybutem jest ten o największym przyroście informacji
+        best_feature_index = np.argmax(item_values)
+        best_feature = features[best_feature_index]
+
+        # Struktura drzewa
+        tree = {best_feature: {}}
+
+        # Aktualizacja zbioru atrybutów
+        features = [i for i in features if i != best_feature]
+
+        # Dla każdej wartości wybranego atrybutu budujemy kolejne poddrzewo
+        for value in np.unique(data[best_feature]):
+
+            sub_data = data.where(data[best_feature] == value).dropna()
+            subtree = ID3(sub_data, data, features, target_attribute_name, parent_node_class)
+
+            tree[best_feature][value] = subtree
+
+        return (tree)
+
+
+def predict(query, tree, default='beetroot'):
+    """
+    Przeszukiwanie drzewa w celu przewidzenia wartości atrybutu "label".
+    W przypadku, gdy dane wejściowe nie pokrywają się z żadnymi wartościami w drzewie
+    (np pH ziemi zostanie sklasyfikowane jako 'strongly acidic', a dane uczące nie obejmują rekordów dla takiej wartości),
+    wówczas przewidywana zostaje wartość domyślna, w tym przypadku jest to burak jako warzywo o najmniejszych wymaganiach.
+    """
+
+    for key in list(query.keys()):
+        if key in list(tree.keys()):
+            try:
+                result = tree[key][query[key]]
+            except:
+                return default
+            result = tree[key][query[key]]
+            if isinstance(result, dict):
+                return predict(query, result)
+
+            else:
+                return result
+
+
+def test(data, tree):
+    # Wartości docelowych atrybutów (nazwy warzyw) zostają usunięte
+    queries = data.iloc[:, :-1].to_dict(orient="records")
+
+    # Przewidywane wartości atrybutów
+    predicted = pd.DataFrame(columns=["predicted"])
+
+    # Obliczenie precyzji przewidywań
+    for i in range(len(data)):
+        predicted.loc[i, "predicted"] = predict(queries[i], tree, 'beetroot')
+    print('Precyzja przewidywań: ', (np.sum(predicted["predicted"] == data["label"]) / len(data)) * 100, '%')
+
+
+def predict_data(data):
+    """
+    Funkcja dostosowana do formatu danych, jakimi dysponuje traktor
+    'data' jest tutaj listą, która zostaje przekonwertowana do postaci słownika,
+    aby możliwe było wywołanie procedury 'predict'.
+    Wyniki zostają zwrócone w postaci listy.
+    """
+
+    queries = pd.DataFrame(data=data, columns=dataset.header)
+    predicted = pd.DataFrame(columns=["predicted"])
+    dict = queries.iloc[:, :-1].to_dict(orient="records")
+
+    for i in range(len(data)):
+        predicted.loc[i, "predicted"] = predict(dict[i], tree, 'beetroot')
+
+    predicted_list = predicted.values.tolist()
+    return predicted_list
+
+
+# tworzenie, wyświetlanie i testowanie drzewa
+
+tree = ID3(training_data, training_data, training_data.columns[:-1])
+#pprint(tree)
+#test(testing_data, tree)
--- a/Podprojekt_s444426/dataset.py
+++ b/Podprojekt_s444426/dataset.py
@ -0,0 +1,113 @@
+from soil_upgrade import Soil
+from carrot_upgrade import Carrot
+from beetroot_upgrade import Beetroot
+from cabbage_upgrade import Cabbage
+
+header = ['previous', 'soil pH', 'dry level', 'label']
+
+training_data = [
+    ['carrot', 'alkaline', 'dry', 'beetroot'],
+    ['carrot', 'slightly acidic', 'dry', 'beetroot'],
+    ['cabbage', 'alkaline', 'dry', 'beetroot'],
+    ['none', 'alkaline', 'dry', 'beetroot'],
+    ['carrot', 'slightly acidic', 'medium wet', 'beetroot'],
+    ['none', 'slightly acidic', 'dry', 'beetroot'],
+    ['pumpkin', 'neutral', 'dry', 'beetroot'],
+    ['beetroot', 'neutral', 'dry', 'beetroot'],
+    ['cabbage', 'alkaline', 'medium wet', 'beetroot'],
+    ['none', 'slightly acidic', 'medium wet', 'beetroot'],
+    ['cabbage', 'acidic', 'dry', 'carrot'],
+    ['none', 'acidic', 'medium wet', 'carrot'],
+    ['carrot', 'neutral', 'dry', 'carrot'],
+    ['beetroot', 'slightly acidic', 'dry', 'carrot'],
+    ['pumpkin', 'acidic', 'medium wet', 'carrot'],
+    ['beetroot', 'acidic', 'medium wet', 'carrot'],
+    ['carrot', 'neutral', 'dry', 'carrot'],
+    ['pumpkin', 'slightly acidic', 'medium wet', 'carrot'],
+    ['beetroot', 'neutral', 'wet', 'pumpkin'],
+    ['none', 'neutral', 'wet', 'pumpkin'],
+    ['carrot', 'slightly acidic', 'wet', 'pumpkin'],
+    ['pumpkin', 'neutral', 'wet', 'pumpkin'],
+    ['cabbage', 'slightly acidic', 'medium wet', 'pumpkin'],
+    ['carrot', 'neutral', 'wet', 'pumpkin'],
+    ['cabbage', 'neutral', 'wet', 'pumpkin'],
+    ['none', 'slightly acidic', 'wet', 'pumpkin'],
+    ['beetroot', 'slightly acidic', 'medium wet', 'pumpkin'],
+    ['carrot', 'neutral', 'medium wet', 'cabbage'],
+    ['pumpkin', 'alkaline', 'wet', 'cabbage'],
+    ['none', 'alkaline', 'medium wet', 'cabbage'],
+    ['beetroot', 'neutral', 'medium wet', 'cabbage'],
+    ['cabbage', 'slightly acidic', 'wet', 'cabbage'],
+    ['none', 'neutral', 'medium wet', 'cabbage'],
+    ['cabbage', 'neutral', 'medium wet', 'cabbage'],
+    ['carrot', 'alkaline', 'wet', 'cabbage'],
+    ['none', 'alkaline', 'wet', 'cabbage'],
+    ['pumpkin', 'neutral', 'medium wet', 'cabbage'],
+    ['carrot', 'neutral', 'soaking wet', 'none'],
+    ['beetroot', 'alkaline', 'very dry', 'none'],
+    ['none', 'alkaline', 'soaking wet', 'none'],
+    ['cabbage', 'acidic', 'medium wet', 'none'],
+    ['pumpkin', 'acidic', 'soaking wet', 'none'],
+    ['cabbage', 'slightly acidic', 'soaking wet', 'none'],
+    ['none', 'slightly acidic', 'soaking wet', 'none'],
+    ['carrot', 'neutral', 'very dry', 'none'],
+    ['carrot', 'acidic', 'medium wet', 'none'],
+    ['pumpkin', 'neutral', 'soaking wet', 'none']
+]
+
+testing_data = [
+
+    ['beetroot', 'neutral', 'dry', 'beetroot'],
+    ['cabbage', 'alkaline', 'medium wet', 'beetroot'],
+    ['none', 'slightly acidic', 'medium wet', 'beetroot'],
+    ['cabbage', 'acidic', 'dry', 'carrot'],
+    ['none', 'acidic', 'medium wet', 'carrot'],
+    ['carrot', 'neutral', 'dry', 'carrot'],
+    ['beetroot', 'neutral', 'wet', 'pumpkin'],
+    ['none', 'neutral', 'wet', 'pumpkin'],
+    ['carrot', 'slightly acidic', 'wet', 'pumpkin'],
+    ['carrot', 'neutral', 'medium wet', 'cabbage'],
+    ['pumpkin', 'alkaline', 'wet', 'cabbage'],
+    ['none', 'alkaline', 'medium wet', 'cabbage'],
+    ['carrot', 'neutral', 'soaking wet', 'none'],
+    ['beetroot', 'alkaline', 'very dry', 'none'],
+    ['none', 'alkaline', 'soaking wet', 'none'],
+
+]
+
+
+def create_data_soil():
+    all_soil = []
+
+    soil = Soil(6.5, 0.5)
+    plant = Cabbage(100)
+    soil.add_plant(plant)
+    all_soil.append(soil)
+
+    soil = Soil(4.6, 0.7)
+    plant = Carrot(100)
+    soil.add_plant(plant)
+    all_soil.append(soil)
+
+    soil = Soil(5.6, 0.6)
+    all_soil.append(soil)
+
+    soil = Soil(6.1, 0.5)
+    plant = Beetroot(95)
+    soil.add_plant(plant)
+    all_soil.append(soil)
+
+    soil = Soil(6.5, 0.4)
+    plant = Cabbage(90)
+    soil.add_plant(plant)
+    all_soil.append(soil)
+
+    soil = Soil(6.0, 0.5)
+    all_soil.append(soil)
+
+    soil = Soil(7.1, 0.5)
+    plant = Cabbage(80)
+    soil.add_plant(plant)
+    all_soil.append(soil)
+
+    return all_soil
--- a/Podprojekt_s444426/plant_ID3.py
+++ b/Podprojekt_s444426/plant_ID3.py
@ -0,0 +1,98 @@
+from ID3 import predict_data
+from dataset import create_data_soil
+from plant_upgrade import Plant
+import pandas as pd
+
+
+def decide_to_plant(soil):
+    """
+    Z tej funkcji będzie korzystał traktor, aby podjąć decyzję o sadzeniu rośliny.
+    Kluczowym punktem w tym działaniu jest sprawdzenie, czy w danej ziemi nie rośnie już jakaś roślina,
+    dopiero po roztrzygnięciu tej kwestii następuje poszukiwanie odpowiedniej rośliny w drzewie decyzyjnym.
+    """
+
+    if soil.have_plant():
+        plant = soil.get_plant()
+        if plant.collect() == 'True':
+            info = get_info(soil)
+            plant.leave_soil()
+        else:
+            return [['none']]
+    else:
+        info = get_info(soil)
+
+    data = []
+    data.append(info)
+
+    # Roślina jest gotowa do zbioru lub ziemia jest wolna
+    predicted = predict_data(data)
+    # grow_a_plant(soil,predicted[0][0])
+
+    return predicted
+
+
+def grow_a_plant(soil, plant_name):
+    plant = Plant(plant_name)
+    soil.add_plant(plant)
+    print(soil)
+
+
+def get_info(soil):
+
+    previous = 'none'
+
+    if soil.have_plant():
+        plant = soil.get_plant()
+        previous = plant.get_name()
+
+    info = [previous, categorize_pH(soil.get_pH()), categorize_dry_level(soil.get_dry_level()), '']
+
+    return info
+
+
+def categorize_pH(pH):
+    if pH <= 4.5:
+        return 'strongly acidic'
+    if 4.5 < pH <= 5.5:
+        return 'acidic'
+    if 5.5 < pH <= 6.5:
+        return 'slightly acidic'
+    if 6.5 < pH <= 7.2:
+        return 'neutral'
+    if 7.2 < pH:
+        return 'alkaline'
+
+
+def categorize_dry_level(dry_level):
+    if dry_level <= 0.1:
+        return 'soaking wet'
+    if 0.1 < dry_level <= 0.4:
+        return 'wet'
+    if 0.4 < dry_level <= 0.6:
+        return 'medium wet'
+    if 0.6 < dry_level <= 0.8:
+        return 'dry'
+    if 0.8 < dry_level:
+        return 'very dry'
+
+
+"""
+Testowanie działania dla argumentów obiektu Soil
+"""
+
+all_soil = create_data_soil()
+result = []
+
+for soil in all_soil:
+    predicted = decide_to_plant(soil)
+    for p in predicted:
+        if soil.have_plant():
+            plant = soil.get_plant().get_name()
+            collect = soil.get_plant().get_collect()
+        else:
+            plant = 'none'
+            collect = '-'
+        result.append([soil.get_pH(), soil.get_dry_level(), plant, collect, p[0]])
+
+result = pd.DataFrame(data=result, columns=['pH', 'dry level', 'plant', 'ripe', 'prediction'])
+print(result)
--- a/Podprojekt_s444426/soil_upgrade.py
+++ b/Podprojekt_s444426/soil_upgrade.py
@ -0,0 +1,78 @@
+from datetime import datetime
+
+class Soil:
+
+    def __init__(self, pH, dry):
+        super().__init__()
+        #self._id = ID
+        #self._x = x
+        #self._y = y
+        self._plant = -1    #nowa gleba nie ma roślinki
+        self._dry = dry       #procent wysuszenia gleby, tworząc nową jest nawodniona w 100%
+        self._pH = pH
+        self._starttime = datetime.now()
+
+#    def __del__(self):
+#        self.plant_remove()
+
+    def __str__(self):
+        if self.have_plant():
+            return f'pH={self._pH}, dry level={self._dry}, plant={self.get_plant().get_name()}'
+        else:
+            return f'soil: pH={self._pH}, dry level={self._dry}, plant=none'
+      #return f'{self._x}, {self._y}), Plant: {self._plant}'
+
+    # współrzędne pola
+    def get_coordinates(self):
+        return self._x, self._y
+
+    # id gleby
+    def get_id(self):
+        return self._id
+
+    # zasadzenie roślinki
+    def add_plant(self, plant):
+        if not self.have_plant():
+            plant.add_soil(self)
+            self._plant = plant
+
+    # zwraca czy w ziemi znajduje się roślinka
+    def have_plant(self):
+        return self._plant != -1
+    
+    # zwraca roślinkę znajdującą się w ziemii
+    def get_plant(self):
+        return self._plant
+
+    def get_pH(self):
+        return self._pH
+
+    def get_dry_level(self):
+        return self._dry
+
+    #sprawdza w ilu procentach ziemia jest sucha
+    def is_dry(self):
+        self.__drying()
+        if self._dry < 30:
+            return 'False'
+        elif 30 <= self._dry < 70:
+            return 'Medium'
+        else:
+            return 'True'
+
+    #metoda wysuszajaca ziemie. dodaje wysuszenie do ziemi, wywolywana w momencie sprawdzania czy ziemia jest sucha
+    def __drying(self):
+        checktime = datetime.now()
+        delta = checktime - self._starttime
+        a = delta //60
+        self._dry += a
+        self._starttime = checktime
+        self.__is_dry()
+
+
+    # usuwa roślinkę z ziemi i ją zwraca
+    def plant_remove(self):
+        if self.have_plant():
+            a = self.get_plant()
+            a.leave_soil()
+            self._plant = -1