ID3
This commit is contained in:
parent
ab08c46c63
commit
7930f1fab7
157
Podprojekt_s444426/ID3.py
Normal file
157
Podprojekt_s444426/ID3.py
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from pprint import pprint
|
||||||
|
import dataset
|
||||||
|
|
||||||
|
training_data = pd.DataFrame(data=dataset.training_data, columns=dataset.header)
|
||||||
|
testing_data = pd.DataFrame(data=dataset.testing_data, columns=dataset.header)
|
||||||
|
|
||||||
|
|
||||||
|
def entropy(target_col):
|
||||||
|
"""
|
||||||
|
Obliczenie warości entropii dla wskazanej kolumny
|
||||||
|
"""
|
||||||
|
values, counts = np.unique(target_col, return_counts=True)
|
||||||
|
entropy = np.sum(
|
||||||
|
[(-counts[i] / np.sum(counts)) * np.log2(counts[i] / np.sum(counts)) for i in range(len(values))])
|
||||||
|
return entropy
|
||||||
|
|
||||||
|
|
||||||
|
def info_gain(data, split_attribute_name, target_name="label"):
|
||||||
|
"""
|
||||||
|
Obliczenie wartości przyrostu informacji dla wskazanego atrybutu (split_attribute_name)
|
||||||
|
w podanym zbiorze (data)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Wartość entropii zbioru
|
||||||
|
total_entropy = entropy(data[target_name])
|
||||||
|
|
||||||
|
# Wyodrębnienie poszczególnych "podzbiorów"
|
||||||
|
vals, counts = np.unique(data[split_attribute_name], return_counts=True)
|
||||||
|
|
||||||
|
# Średnia ważona entropii każdego podzbioru
|
||||||
|
weighted_entropy = np.sum(
|
||||||
|
[(counts[i] / np.sum(counts)) * entropy(data.where(data[split_attribute_name] == vals[i]).dropna()[target_name])
|
||||||
|
for i in range(len(vals))])
|
||||||
|
|
||||||
|
# Przyrost informacji
|
||||||
|
information_gain = total_entropy - weighted_entropy
|
||||||
|
|
||||||
|
return information_gain
|
||||||
|
|
||||||
|
|
||||||
|
def ID3(data, original_data, features, target_attribute_name="label", parent_node_class=None):
|
||||||
|
"""
|
||||||
|
Algorytm ID3
|
||||||
|
|
||||||
|
parametry:
|
||||||
|
data zbiór danych, dla którego poszukujemy drzewa decyzyjnego
|
||||||
|
original_data oryginalny zbiór danych (zwracany gdy data == None)
|
||||||
|
features lista atrybutów wejściowego zbioru
|
||||||
|
target_attribute_name docelowy atrybut, który chcemy przewidzieć
|
||||||
|
parent_node_class nadrzędna wartość
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Jeżeli wszystkie atrybuty są takie same, zwracamy liść z pierwszą napotkaną wartością
|
||||||
|
|
||||||
|
if len(np.unique(data[target_attribute_name])) <= 1:
|
||||||
|
return np.unique(data[target_attribute_name])[0]
|
||||||
|
|
||||||
|
elif len(data) == 0:
|
||||||
|
return np.unique(original_data[target_attribute_name])[
|
||||||
|
np.argmax(np.unique(original_data[target_attribute_name], return_counts=True)[1])]
|
||||||
|
|
||||||
|
elif len(features) == 0:
|
||||||
|
return parent_node_class
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
# Aktualizacja nadrzędnej wartości
|
||||||
|
parent_node_class = np.unique(data[target_attribute_name])[
|
||||||
|
np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])]
|
||||||
|
|
||||||
|
# Obliczenie przyrostu informacji dla każdego potencjalnego atrybutu,
|
||||||
|
# według którego nastąpi podział zbioru
|
||||||
|
item_values = [info_gain(data, feature, target_attribute_name) for feature in
|
||||||
|
features]
|
||||||
|
|
||||||
|
# Najlepszym atrybutem jest ten o największym przyroście informacji
|
||||||
|
best_feature_index = np.argmax(item_values)
|
||||||
|
best_feature = features[best_feature_index]
|
||||||
|
|
||||||
|
# Struktura drzewa
|
||||||
|
tree = {best_feature: {}}
|
||||||
|
|
||||||
|
# Aktualizacja zbioru atrybutów
|
||||||
|
features = [i for i in features if i != best_feature]
|
||||||
|
|
||||||
|
# Dla każdej wartości wybranego atrybutu budujemy kolejne poddrzewo
|
||||||
|
for value in np.unique(data[best_feature]):
|
||||||
|
|
||||||
|
sub_data = data.where(data[best_feature] == value).dropna()
|
||||||
|
subtree = ID3(sub_data, data, features, target_attribute_name, parent_node_class)
|
||||||
|
|
||||||
|
tree[best_feature][value] = subtree
|
||||||
|
|
||||||
|
return (tree)
|
||||||
|
|
||||||
|
|
||||||
|
def predict(query, tree, default='beetroot'):
|
||||||
|
"""
|
||||||
|
Przeszukiwanie drzewa w celu przewidzenia wartości atrybutu "label".
|
||||||
|
W przypadku, gdy dane wejściowe nie pokrywają się z żadnymi wartościami w drzewie
|
||||||
|
(np pH ziemi zostanie sklasyfikowane jako 'strongly acidic', a dane uczące nie obejmują rekordów dla takiej wartości),
|
||||||
|
wówczas przewidywana zostaje wartość domyślna, w tym przypadku jest to burak jako warzywo o najmniejszych wymaganiach.
|
||||||
|
"""
|
||||||
|
|
||||||
|
for key in list(query.keys()):
|
||||||
|
if key in list(tree.keys()):
|
||||||
|
try:
|
||||||
|
result = tree[key][query[key]]
|
||||||
|
except:
|
||||||
|
return default
|
||||||
|
result = tree[key][query[key]]
|
||||||
|
if isinstance(result, dict):
|
||||||
|
return predict(query, result)
|
||||||
|
|
||||||
|
else:
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def test(data, tree):
|
||||||
|
# Wartości docelowych atrybutów (nazwy warzyw) zostają usunięte
|
||||||
|
queries = data.iloc[:, :-1].to_dict(orient="records")
|
||||||
|
|
||||||
|
# Przewidywane wartości atrybutów
|
||||||
|
predicted = pd.DataFrame(columns=["predicted"])
|
||||||
|
|
||||||
|
# Obliczenie precyzji przewidywań
|
||||||
|
for i in range(len(data)):
|
||||||
|
predicted.loc[i, "predicted"] = predict(queries[i], tree, 'beetroot')
|
||||||
|
print('Precyzja przewidywań: ', (np.sum(predicted["predicted"] == data["label"]) / len(data)) * 100, '%')
|
||||||
|
|
||||||
|
|
||||||
|
def predict_data(data):
|
||||||
|
"""
|
||||||
|
Funkcja dostosowana do formatu danych, jakimi dysponuje traktor
|
||||||
|
'data' jest tutaj listą, która zostaje przekonwertowana do postaci słownika,
|
||||||
|
aby możliwe było wywołanie procedury 'predict'.
|
||||||
|
Wyniki zostają zwrócone w postaci listy.
|
||||||
|
"""
|
||||||
|
|
||||||
|
queries = pd.DataFrame(data=data, columns=dataset.header)
|
||||||
|
predicted = pd.DataFrame(columns=["predicted"])
|
||||||
|
dict = queries.iloc[:, :-1].to_dict(orient="records")
|
||||||
|
|
||||||
|
for i in range(len(data)):
|
||||||
|
predicted.loc[i, "predicted"] = predict(dict[i], tree, 'beetroot')
|
||||||
|
|
||||||
|
predicted_list = predicted.values.tolist()
|
||||||
|
return predicted_list
|
||||||
|
|
||||||
|
|
||||||
|
# tworzenie, wyświetlanie i testowanie drzewa
|
||||||
|
|
||||||
|
tree = ID3(training_data, training_data, training_data.columns[:-1])
|
||||||
|
#pprint(tree)
|
||||||
|
#test(testing_data, tree)
|
113
Podprojekt_s444426/dataset.py
Normal file
113
Podprojekt_s444426/dataset.py
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
from soil_upgrade import Soil
|
||||||
|
from carrot_upgrade import Carrot
|
||||||
|
from beetroot_upgrade import Beetroot
|
||||||
|
from cabbage_upgrade import Cabbage
|
||||||
|
|
||||||
|
header = ['previous', 'soil pH', 'dry level', 'label']
|
||||||
|
|
||||||
|
training_data = [
|
||||||
|
['carrot', 'alkaline', 'dry', 'beetroot'],
|
||||||
|
['carrot', 'slightly acidic', 'dry', 'beetroot'],
|
||||||
|
['cabbage', 'alkaline', 'dry', 'beetroot'],
|
||||||
|
['none', 'alkaline', 'dry', 'beetroot'],
|
||||||
|
['carrot', 'slightly acidic', 'medium wet', 'beetroot'],
|
||||||
|
['none', 'slightly acidic', 'dry', 'beetroot'],
|
||||||
|
['pumpkin', 'neutral', 'dry', 'beetroot'],
|
||||||
|
['beetroot', 'neutral', 'dry', 'beetroot'],
|
||||||
|
['cabbage', 'alkaline', 'medium wet', 'beetroot'],
|
||||||
|
['none', 'slightly acidic', 'medium wet', 'beetroot'],
|
||||||
|
['cabbage', 'acidic', 'dry', 'carrot'],
|
||||||
|
['none', 'acidic', 'medium wet', 'carrot'],
|
||||||
|
['carrot', 'neutral', 'dry', 'carrot'],
|
||||||
|
['beetroot', 'slightly acidic', 'dry', 'carrot'],
|
||||||
|
['pumpkin', 'acidic', 'medium wet', 'carrot'],
|
||||||
|
['beetroot', 'acidic', 'medium wet', 'carrot'],
|
||||||
|
['carrot', 'neutral', 'dry', 'carrot'],
|
||||||
|
['pumpkin', 'slightly acidic', 'medium wet', 'carrot'],
|
||||||
|
['beetroot', 'neutral', 'wet', 'pumpkin'],
|
||||||
|
['none', 'neutral', 'wet', 'pumpkin'],
|
||||||
|
['carrot', 'slightly acidic', 'wet', 'pumpkin'],
|
||||||
|
['pumpkin', 'neutral', 'wet', 'pumpkin'],
|
||||||
|
['cabbage', 'slightly acidic', 'medium wet', 'pumpkin'],
|
||||||
|
['carrot', 'neutral', 'wet', 'pumpkin'],
|
||||||
|
['cabbage', 'neutral', 'wet', 'pumpkin'],
|
||||||
|
['none', 'slightly acidic', 'wet', 'pumpkin'],
|
||||||
|
['beetroot', 'slightly acidic', 'medium wet', 'pumpkin'],
|
||||||
|
['carrot', 'neutral', 'medium wet', 'cabbage'],
|
||||||
|
['pumpkin', 'alkaline', 'wet', 'cabbage'],
|
||||||
|
['none', 'alkaline', 'medium wet', 'cabbage'],
|
||||||
|
['beetroot', 'neutral', 'medium wet', 'cabbage'],
|
||||||
|
['cabbage', 'slightly acidic', 'wet', 'cabbage'],
|
||||||
|
['none', 'neutral', 'medium wet', 'cabbage'],
|
||||||
|
['cabbage', 'neutral', 'medium wet', 'cabbage'],
|
||||||
|
['carrot', 'alkaline', 'wet', 'cabbage'],
|
||||||
|
['none', 'alkaline', 'wet', 'cabbage'],
|
||||||
|
['pumpkin', 'neutral', 'medium wet', 'cabbage'],
|
||||||
|
['carrot', 'neutral', 'soaking wet', 'none'],
|
||||||
|
['beetroot', 'alkaline', 'very dry', 'none'],
|
||||||
|
['none', 'alkaline', 'soaking wet', 'none'],
|
||||||
|
['cabbage', 'acidic', 'medium wet', 'none'],
|
||||||
|
['pumpkin', 'acidic', 'soaking wet', 'none'],
|
||||||
|
['cabbage', 'slightly acidic', 'soaking wet', 'none'],
|
||||||
|
['none', 'slightly acidic', 'soaking wet', 'none'],
|
||||||
|
['carrot', 'neutral', 'very dry', 'none'],
|
||||||
|
['carrot', 'acidic', 'medium wet', 'none'],
|
||||||
|
['pumpkin', 'neutral', 'soaking wet', 'none']
|
||||||
|
]
|
||||||
|
|
||||||
|
testing_data = [
|
||||||
|
|
||||||
|
['beetroot', 'neutral', 'dry', 'beetroot'],
|
||||||
|
['cabbage', 'alkaline', 'medium wet', 'beetroot'],
|
||||||
|
['none', 'slightly acidic', 'medium wet', 'beetroot'],
|
||||||
|
['cabbage', 'acidic', 'dry', 'carrot'],
|
||||||
|
['none', 'acidic', 'medium wet', 'carrot'],
|
||||||
|
['carrot', 'neutral', 'dry', 'carrot'],
|
||||||
|
['beetroot', 'neutral', 'wet', 'pumpkin'],
|
||||||
|
['none', 'neutral', 'wet', 'pumpkin'],
|
||||||
|
['carrot', 'slightly acidic', 'wet', 'pumpkin'],
|
||||||
|
['carrot', 'neutral', 'medium wet', 'cabbage'],
|
||||||
|
['pumpkin', 'alkaline', 'wet', 'cabbage'],
|
||||||
|
['none', 'alkaline', 'medium wet', 'cabbage'],
|
||||||
|
['carrot', 'neutral', 'soaking wet', 'none'],
|
||||||
|
['beetroot', 'alkaline', 'very dry', 'none'],
|
||||||
|
['none', 'alkaline', 'soaking wet', 'none'],
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def create_data_soil():
|
||||||
|
all_soil = []
|
||||||
|
|
||||||
|
soil = Soil(6.5, 0.5)
|
||||||
|
plant = Cabbage(100)
|
||||||
|
soil.add_plant(plant)
|
||||||
|
all_soil.append(soil)
|
||||||
|
|
||||||
|
soil = Soil(4.6, 0.7)
|
||||||
|
plant = Carrot(100)
|
||||||
|
soil.add_plant(plant)
|
||||||
|
all_soil.append(soil)
|
||||||
|
|
||||||
|
soil = Soil(5.6, 0.6)
|
||||||
|
all_soil.append(soil)
|
||||||
|
|
||||||
|
soil = Soil(6.1, 0.5)
|
||||||
|
plant = Beetroot(95)
|
||||||
|
soil.add_plant(plant)
|
||||||
|
all_soil.append(soil)
|
||||||
|
|
||||||
|
soil = Soil(6.5, 0.4)
|
||||||
|
plant = Cabbage(90)
|
||||||
|
soil.add_plant(plant)
|
||||||
|
all_soil.append(soil)
|
||||||
|
|
||||||
|
soil = Soil(6.0, 0.5)
|
||||||
|
all_soil.append(soil)
|
||||||
|
|
||||||
|
soil = Soil(7.1, 0.5)
|
||||||
|
plant = Cabbage(80)
|
||||||
|
soil.add_plant(plant)
|
||||||
|
all_soil.append(soil)
|
||||||
|
|
||||||
|
return all_soil
|
98
Podprojekt_s444426/plant_ID3.py
Normal file
98
Podprojekt_s444426/plant_ID3.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
from ID3 import predict_data
|
||||||
|
from dataset import create_data_soil
|
||||||
|
from plant_upgrade import Plant
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def decide_to_plant(soil):
|
||||||
|
"""
|
||||||
|
Z tej funkcji będzie korzystał traktor, aby podjąć decyzję o sadzeniu rośliny.
|
||||||
|
Kluczowym punktem w tym działaniu jest sprawdzenie, czy w danej ziemi nie rośnie już jakaś roślina,
|
||||||
|
dopiero po roztrzygnięciu tej kwestii następuje poszukiwanie odpowiedniej rośliny w drzewie decyzyjnym.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if soil.have_plant():
|
||||||
|
plant = soil.get_plant()
|
||||||
|
if plant.collect() == 'True':
|
||||||
|
info = get_info(soil)
|
||||||
|
plant.leave_soil()
|
||||||
|
else:
|
||||||
|
return [['none']]
|
||||||
|
else:
|
||||||
|
info = get_info(soil)
|
||||||
|
|
||||||
|
data = []
|
||||||
|
data.append(info)
|
||||||
|
|
||||||
|
# Roślina jest gotowa do zbioru lub ziemia jest wolna
|
||||||
|
predicted = predict_data(data)
|
||||||
|
# grow_a_plant(soil,predicted[0][0])
|
||||||
|
|
||||||
|
return predicted
|
||||||
|
|
||||||
|
|
||||||
|
def grow_a_plant(soil, plant_name):
|
||||||
|
plant = Plant(plant_name)
|
||||||
|
soil.add_plant(plant)
|
||||||
|
print(soil)
|
||||||
|
|
||||||
|
|
||||||
|
def get_info(soil):
|
||||||
|
|
||||||
|
previous = 'none'
|
||||||
|
|
||||||
|
if soil.have_plant():
|
||||||
|
plant = soil.get_plant()
|
||||||
|
previous = plant.get_name()
|
||||||
|
|
||||||
|
info = [previous, categorize_pH(soil.get_pH()), categorize_dry_level(soil.get_dry_level()), '']
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
def categorize_pH(pH):
|
||||||
|
if pH <= 4.5:
|
||||||
|
return 'strongly acidic'
|
||||||
|
if 4.5 < pH <= 5.5:
|
||||||
|
return 'acidic'
|
||||||
|
if 5.5 < pH <= 6.5:
|
||||||
|
return 'slightly acidic'
|
||||||
|
if 6.5 < pH <= 7.2:
|
||||||
|
return 'neutral'
|
||||||
|
if 7.2 < pH:
|
||||||
|
return 'alkaline'
|
||||||
|
|
||||||
|
|
||||||
|
def categorize_dry_level(dry_level):
|
||||||
|
if dry_level <= 0.1:
|
||||||
|
return 'soaking wet'
|
||||||
|
if 0.1 < dry_level <= 0.4:
|
||||||
|
return 'wet'
|
||||||
|
if 0.4 < dry_level <= 0.6:
|
||||||
|
return 'medium wet'
|
||||||
|
if 0.6 < dry_level <= 0.8:
|
||||||
|
return 'dry'
|
||||||
|
if 0.8 < dry_level:
|
||||||
|
return 'very dry'
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Testowanie działania dla argumentów obiektu Soil
|
||||||
|
"""
|
||||||
|
|
||||||
|
all_soil = create_data_soil()
|
||||||
|
result = []
|
||||||
|
|
||||||
|
for soil in all_soil:
|
||||||
|
predicted = decide_to_plant(soil)
|
||||||
|
for p in predicted:
|
||||||
|
if soil.have_plant():
|
||||||
|
plant = soil.get_plant().get_name()
|
||||||
|
collect = soil.get_plant().get_collect()
|
||||||
|
else:
|
||||||
|
plant = 'none'
|
||||||
|
collect = '-'
|
||||||
|
result.append([soil.get_pH(), soil.get_dry_level(), plant, collect, p[0]])
|
||||||
|
|
||||||
|
result = pd.DataFrame(data=result, columns=['pH', 'dry level', 'plant', 'ripe', 'prediction'])
|
||||||
|
print(result)
|
78
Podprojekt_s444426/soil_upgrade.py
Normal file
78
Podprojekt_s444426/soil_upgrade.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
class Soil:
|
||||||
|
|
||||||
|
def __init__(self, pH, dry):
|
||||||
|
super().__init__()
|
||||||
|
#self._id = ID
|
||||||
|
#self._x = x
|
||||||
|
#self._y = y
|
||||||
|
self._plant = -1 #nowa gleba nie ma roślinki
|
||||||
|
self._dry = dry #procent wysuszenia gleby, tworząc nową jest nawodniona w 100%
|
||||||
|
self._pH = pH
|
||||||
|
self._starttime = datetime.now()
|
||||||
|
|
||||||
|
# def __del__(self):
|
||||||
|
# self.plant_remove()
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
if self.have_plant():
|
||||||
|
return f'pH={self._pH}, dry level={self._dry}, plant={self.get_plant().get_name()}'
|
||||||
|
else:
|
||||||
|
return f'soil: pH={self._pH}, dry level={self._dry}, plant=none'
|
||||||
|
#return f'{self._x}, {self._y}), Plant: {self._plant}'
|
||||||
|
|
||||||
|
# współrzędne pola
|
||||||
|
def get_coordinates(self):
|
||||||
|
return self._x, self._y
|
||||||
|
|
||||||
|
# id gleby
|
||||||
|
def get_id(self):
|
||||||
|
return self._id
|
||||||
|
|
||||||
|
# zasadzenie roślinki
|
||||||
|
def add_plant(self, plant):
|
||||||
|
if not self.have_plant():
|
||||||
|
plant.add_soil(self)
|
||||||
|
self._plant = plant
|
||||||
|
|
||||||
|
# zwraca czy w ziemi znajduje się roślinka
|
||||||
|
def have_plant(self):
|
||||||
|
return self._plant != -1
|
||||||
|
|
||||||
|
# zwraca roślinkę znajdującą się w ziemii
|
||||||
|
def get_plant(self):
|
||||||
|
return self._plant
|
||||||
|
|
||||||
|
def get_pH(self):
|
||||||
|
return self._pH
|
||||||
|
|
||||||
|
def get_dry_level(self):
|
||||||
|
return self._dry
|
||||||
|
|
||||||
|
#sprawdza w ilu procentach ziemia jest sucha
|
||||||
|
def is_dry(self):
|
||||||
|
self.__drying()
|
||||||
|
if self._dry < 30:
|
||||||
|
return 'False'
|
||||||
|
elif 30 <= self._dry < 70:
|
||||||
|
return 'Medium'
|
||||||
|
else:
|
||||||
|
return 'True'
|
||||||
|
|
||||||
|
#metoda wysuszajaca ziemie. dodaje wysuszenie do ziemi, wywolywana w momencie sprawdzania czy ziemia jest sucha
|
||||||
|
def __drying(self):
|
||||||
|
checktime = datetime.now()
|
||||||
|
delta = checktime - self._starttime
|
||||||
|
a = delta //60
|
||||||
|
self._dry += a
|
||||||
|
self._starttime = checktime
|
||||||
|
self.__is_dry()
|
||||||
|
|
||||||
|
|
||||||
|
# usuwa roślinkę z ziemi i ją zwraca
|
||||||
|
def plant_remove(self):
|
||||||
|
if self.have_plant():
|
||||||
|
a = self.get_plant()
|
||||||
|
a.leave_soil()
|
||||||
|
self._plant = -1
|
Loading…
Reference in New Issue
Block a user