drzewo decyzyjne
This commit is contained in:
parent
4020cc71d9
commit
df9d873768
@ -293,7 +293,7 @@
|
|||||||
"vege": false,
|
"vege": false,
|
||||||
"size": 50,
|
"size": 50,
|
||||||
"allergens": "tomato",
|
"allergens": "tomato",
|
||||||
"ingridients": [ "cheese", "chicken", "onion", "corn", "toamto" ],
|
"ingridients": [ "cheese", "chicken", "onion", "corn", "tomato" ],
|
||||||
"drink_in": true
|
"drink_in": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
150
tiles.py
150
tiles.py
@ -456,136 +456,8 @@ def evaluate_preferences(preferences):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
# decision tree ręcznie
|
|
||||||
class GadId3Classifier:
|
|
||||||
|
|
||||||
def fit(self, input, output):
|
|
||||||
data = input.copy()
|
|
||||||
data[output.name] = output
|
|
||||||
self.tree = self.decision_tree(data, data, input.columns, output.name)
|
|
||||||
|
|
||||||
def predict(self, input):
|
|
||||||
samples = input.to_dict(orient='records')
|
|
||||||
predictions = []
|
|
||||||
for sample in samples:
|
|
||||||
predictions.append(self.make_prediction(sample, self.tree, 1.0))
|
|
||||||
|
|
||||||
return predictions
|
|
||||||
|
|
||||||
def entropy(self, attribute_column):
|
|
||||||
values, counts = np.unique(attribute_column, return_counts=True)
|
|
||||||
entropy_list = []
|
|
||||||
for i in range(len(values)):
|
|
||||||
probability = counts[i] / np.sum(counts)
|
|
||||||
entropy_list.append(-probability * np.log2(probability))
|
|
||||||
|
|
||||||
total_entropy = np.sum(entropy_list)
|
|
||||||
return total_entropy
|
|
||||||
|
|
||||||
def information_gain(self, data, feature_attribute_name, target_attribute_name):
|
|
||||||
total_entropy = self.entropy(data[target_attribute_name])
|
|
||||||
|
|
||||||
values, counts = np.unique(data[feature_attribute_name], return_counts=True)
|
|
||||||
|
|
||||||
weighted_entropy_list = []
|
|
||||||
|
|
||||||
for i in range(len(values)):
|
|
||||||
subset_probability = counts[i] / np.sum(counts)
|
|
||||||
subset_entropy = self.entropy(
|
|
||||||
data.where(data[feature_attribute_name] == values[i]).dropna()[target_attribute_name])
|
|
||||||
weighted_entropy_list.append(subset_probability * subset_entropy)
|
|
||||||
|
|
||||||
total_weighted_entropy = np.sum(weighted_entropy_list)
|
|
||||||
|
|
||||||
information_gain = total_entropy - total_weighted_entropy
|
|
||||||
|
|
||||||
return information_gain
|
|
||||||
|
|
||||||
def decision_tree(self, data, orginal_data, feature_attribute_names, target_attribute_name, parent_node_class=None):
|
|
||||||
unique_classes = np.unique(data[target_attribute_name])
|
|
||||||
if len(unique_classes) <= 1:
|
|
||||||
return unique_classes[0]
|
|
||||||
elif len(data) == 0:
|
|
||||||
majority_class_index = np.argmax(np.unique(original_data[target_attribute_name], return_counts=True)[1])
|
|
||||||
return np.unique(original_data[target_attribute_name])[majority_class_index]
|
|
||||||
elif len(feature_attribute_names) == 0:
|
|
||||||
return parent_node_class
|
|
||||||
else:
|
|
||||||
majority_class_index = np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])
|
|
||||||
parent_node_class = unique_classes[majority_class_index]
|
|
||||||
ig_values = [self.information_gain(data, feature, target_attribute_name) for feature in
|
|
||||||
feature_attribute_names]
|
|
||||||
best_feature_index = np.argmax(ig_values)
|
|
||||||
best_feature = feature_attribute_names[best_feature_index]
|
|
||||||
|
|
||||||
tree = {best_feature: {}}
|
|
||||||
|
|
||||||
feature_attribute_names = [i for i in feature_attribute_names if i != best_feature]
|
|
||||||
|
|
||||||
parent_attribute_values = np.unique(data[best_feature])
|
|
||||||
for value in parent_attribute_values:
|
|
||||||
sub_data = data.where(data[best_feature] == value).dropna()
|
|
||||||
|
|
||||||
subtree = self.decision_tree(sub_data, orginal_data, feature_attribute_names, target_attribute_name,
|
|
||||||
parent_node_class)
|
|
||||||
|
|
||||||
tree[best_feature][value] = subtree
|
|
||||||
|
|
||||||
return tree
|
|
||||||
|
|
||||||
def make_prediction(self, sample, tree, default=1):
|
|
||||||
for attribute in list(sample.keys()):
|
|
||||||
if attribute in list(tree.keys()):
|
|
||||||
try:
|
|
||||||
result = tree[attribute][sample[attribute]]
|
|
||||||
except:
|
|
||||||
return default
|
|
||||||
|
|
||||||
result = tree[attribute][sample[attribute]]
|
|
||||||
|
|
||||||
if isinstance(result, dict):
|
|
||||||
return self.make_prediction(sample, result)
|
|
||||||
else:
|
|
||||||
return result
|
|
||||||
|
|
||||||
def train_id3(prefernce):
|
|
||||||
df = pd.read_excel("data.xlsx")
|
|
||||||
d = {'low': 30, 'high': 50}
|
|
||||||
df['level of hunger'] = df['level of hunger'].map(d)
|
|
||||||
|
|
||||||
d = {'none': 0, 'tomato': 1, 'feta': 2, 'olives': 3}
|
|
||||||
df['allergy'] = df['allergy'].map(d)
|
|
||||||
|
|
||||||
d = {'none': 0, 'salami': 1, 'mushrooms': 2, 'pineapple': 3, 'shrimps': 4, 'sausage': 5}
|
|
||||||
df['favorite ingridient'] = df['favorite ingridient'].map(d)
|
|
||||||
|
|
||||||
d = {'margherita': 0, 'hawajska': 1, 'funghi': 2, 'light': 3, '4 sery': 4, 'pepperoni': 5,
|
|
||||||
'salami': 6, 'wegetarianska': 7, 'barbecue': 8, 'miesna': 9, 'paprykowa': 10,
|
|
||||||
'jalapeno': 11, 'barbecue wege': 12, 'kebab': 13, 'grecka': 14, 'piekielna': 15,
|
|
||||||
'drwala': 16, 'grzybowa': 17, 'staropolska': 18, 'goralska': 19, 'prosciutto': 20,
|
|
||||||
'broccoli': 21, 'americana': 22, 'farmerska': 23, 'nachos': 24, 'texas': 25,
|
|
||||||
'kurczak': 26, 'zielona': 27, 'mix': 28}
|
|
||||||
|
|
||||||
df['pizza'] = df['pizza'].map(d)
|
|
||||||
|
|
||||||
features = ['budget', 'spiciness', 'vege', 'level of hunger', 'allergy', 'favorite ingridient', 'drink in']
|
|
||||||
X = df[features]
|
|
||||||
y = df['pizza']
|
|
||||||
|
|
||||||
X_train, X_test, y_train, y_test = train_test_split(X, y)
|
|
||||||
|
|
||||||
model = GadId3Classifier()
|
|
||||||
model.fit(X_train, y_train)
|
|
||||||
|
|
||||||
pre = [prefernce]
|
|
||||||
df = pd.DataFrame(pre, columns=['budget','spiciness','vege','level of hunger','allergy','favorite ingridient','drink in'])
|
|
||||||
|
|
||||||
return model.predict(df)
|
|
||||||
|
|
||||||
|
|
||||||
# decision tree z biblioteka
|
|
||||||
def choose_pizza(prefernce):
|
def choose_pizza(prefernce):
|
||||||
df = pd.read_excel("data.xlsx")
|
df = pd.read_excel("restaurant.xlsx")
|
||||||
|
|
||||||
d = {'low': 30, 'high': 50}
|
d = {'low': 30, 'high': 50}
|
||||||
df['level of hunger'] = df['level of hunger'].map(d)
|
df['level of hunger'] = df['level of hunger'].map(d)
|
||||||
@ -610,7 +482,7 @@ def choose_pizza(prefernce):
|
|||||||
y = df['pizza']
|
y = df['pizza']
|
||||||
x_train, x_test, y_train, y_test = train_test_split(x, y)
|
x_train, x_test, y_train, y_test = train_test_split(x, y)
|
||||||
|
|
||||||
clf = DecisionTreeClassifier(criterion='entropy')
|
clf = DecisionTreeClassifier(random_state=400)
|
||||||
clf = clf.fit(x_train, y_train)
|
clf = clf.fit(x_train, y_train)
|
||||||
|
|
||||||
return clf.predict([prefernce])
|
return clf.predict([prefernce])
|
||||||
@ -702,25 +574,17 @@ def main():
|
|||||||
route = astar(map.get_arr(), (waiter.loc[1] // 32, waiter.loc[0] // 32), goal)
|
route = astar(map.get_arr(), (waiter.loc[1] // 32, waiter.loc[0] // 32), goal)
|
||||||
direction = [(x[1] - y[1], x[0] - y[0]) for x, y in zip(route[1:], route)]
|
direction = [(x[1] - y[1], x[0] - y[0]) for x, y in zip(route[1:], route)]
|
||||||
break
|
break
|
||||||
|
print()
|
||||||
print("Hello Sir, tell me yours preferences")
|
print("Hello Sir, tell me yours preferences")
|
||||||
print("Pass: 'budget', 'spiciness', 'vege', 'level_of_hunger', 'allergy', 'favorite_ingridient', 'drink_in'\n")
|
print("Pass: 'budget', 'spiciness', 'vege', 'level_of_hunger', 'allergy', 'favorite_ingridient', 'drink_in'\n")
|
||||||
print("Here is my list of preferences")
|
print("Here is my list of preferences")
|
||||||
ingridients = tell_preferences()
|
ingridients = tell_preferences()
|
||||||
print(ingridients)
|
print(ingridients)
|
||||||
print()
|
print()
|
||||||
evaluated_ingridients = evaluate_preferences(ingridients)
|
pizza = get_pizza(choose_pizza(evaluate_preferences(ingridients)))
|
||||||
|
print("Our proposition:")
|
||||||
print("recznie drzewo")
|
print("Name = {}\nprice = {}\nspiciness = {}\nvege = {}\nsize = {}\nallergens = {}\ningridients = {}\ndrink_in = {}\n"
|
||||||
num = train_id3(evaluated_ingridients)
|
.format(pizza.name,pizza.price, pizza.spiciness,pizza.vege,pizza.size,pizza.allergens,pizza.ingridients,pizza.drink_in))
|
||||||
piz = get_pizza(int(num[0]))
|
|
||||||
print("Name = {}, pos_in_card - {}, price = {}, spiciness = {}, vege = {}, size = {}, allergens = {}, ingridients = {}, drink_in = {}\n"
|
|
||||||
.format(piz.name, piz.pos_in_card, piz.price, piz.spiciness, piz.vege, piz.size,piz.allergens, piz.ingridients, piz.drink_in))
|
|
||||||
|
|
||||||
number_of_pizza = choose_pizza(evaluated_ingridients)
|
|
||||||
pizza = get_pizza(number_of_pizza)
|
|
||||||
print("drzewo z biblioteka")
|
|
||||||
print("Name = {}, pos_in_card - {}, price = {}, spiciness = {}, vege = {}, size = {}, allergens = {}, ingridients = {}, drink_in = {}\n"
|
|
||||||
.format(pizza.name,pizza.pos_in_card,pizza.price, pizza.spiciness,pizza.vege,pizza.size,pizza.allergens,pizza.ingridients,pizza.drink_in))
|
|
||||||
|
|
||||||
|
|
||||||
if len(direction) > 0:
|
if len(direction) > 0:
|
||||||
|
Loading…
Reference in New Issue
Block a user