SztIn_gr.234798/IC3.py

import pandas as pd
import numpy as np
import json


train_data_m = pd.read_csv("01.csv")
test_data_m = pd.read_csv("10.csv")

def calc_total_entropy(train_data, label, class_list):
    total_row = train_data.shape[0]
    total_entr = 0

    for c in class_list:
        total_class_count = train_data[train_data[label] == c].shape[0]
        if total_class_count == 0:
            total_class_entr = 0
        else:
            total_class_entr = - (total_class_count / total_row) * np.log2(total_class_count / total_row)
        total_entr += total_class_entr

    return total_entr


def calc_entropy(feature_value_data, label, class_list):
    class_count = feature_value_data.shape[0]
    entropy = 0

    for c in class_list:
        label_class_count = feature_value_data[feature_value_data[label] == c].shape[0]

        entropy_class = 0
        if label_class_count != 0:
            probability_class = label_class_count / class_count
            entropy_class = - probability_class * np.log2(probability_class)

        entropy += entropy_class

    return entropy


def calc_info_gain(feature_name, train_data, label, class_list):
    feature_value_list = train_data[feature_name].unique()
    total_row = train_data.shape[0]
    feature_info = 0.0

    for feature_value in feature_value_list:
        feature_value_data = train_data[train_data[feature_name] == feature_value]
        feature_value_count = feature_value_data.shape[0]
        feature_value_entropy = calc_entropy(feature_value_data, label, class_list)
        feature_value_probability = feature_value_count / total_row
        feature_info += feature_value_probability * feature_value_entropy

    return calc_total_entropy(train_data, label, class_list) - feature_info


def find_most_informative_feature(train_data, label, class_list):
    feature_list = train_data.columns.drop(label)
    max_info_gain = -1
    max_info_feature = None

    for feature in feature_list:
        feature_info_gain = calc_info_gain(feature, train_data, label, class_list)
        if max_info_gain < feature_info_gain:
            max_info_gain = feature_info_gain
            max_info_feature = feature
    return max_info_feature


def generate_sub_tree(feature_name, train_data, label, class_list):
    feature_value_count_dict = train_data[feature_name].value_counts(sort=False)
    tree = {}
    for feature_value, count in feature_value_count_dict.items():
        feature_value_data = train_data[train_data[feature_name] == feature_value]

        assigned_to_node = False
        for c in class_list:
            class_count = feature_value_data[feature_value_data[label] == c].shape[0]
            if class_count == count:
                tree[feature_value] = c
                train_data = train_data[train_data[feature_name] != feature_value]
                assigned_to_node = True
        if not assigned_to_node:
            tree[feature_value] = "?"
    return tree, train_data


def make_tree(root, prev_feature_value, train_data, label, class_list):
    if train_data.shape[0] != 0:
        max_info_feature = find_most_informative_feature(train_data, label, class_list)
        tree, train_data = generate_sub_tree(max_info_feature, train_data, label, class_list)
        next_root = None
        if prev_feature_value != None:
            root[prev_feature_value] = dict()
            root[prev_feature_value][max_info_feature] = tree
            next_root = root[prev_feature_value][max_info_feature]
        else:
            root[max_info_feature] = tree
            next_root = root[max_info_feature]

        for node, branch in list(next_root.items()):
            if branch == "?":
                feature_value_data = train_data[train_data[max_info_feature] == node]
                make_tree(next_root, node, feature_value_data, label, class_list)


def id3(train_data_m, label):
    train_data = train_data_m.copy()
    tree = {}
    class_list = train_data[label].unique()
    make_tree(tree, None, train_data, label, class_list)

    return tree

def predict(tree, instance):
    if not isinstance(tree, dict):
        return tree
    else:
        root_node = next(iter(tree))
        feature_value = instance[root_node]
        print(root_node)
        print(feature_value)
        if feature_value in tree[root_node]:
            return predict(tree[root_node][feature_value], instance)
        else:
            return None


def evaluate(tree, test_data_m, label):
    correct_preditct = 0
    wrong_preditct = 0
    for index, row in test_data_m.iterrows():
        print(test_data_m.iloc[index])
        result = predict(tree, test_data_m.iloc[index])
        print()
        if result == test_data_m[label].iloc[index]:
            correct_preditct += 1
        else:
            wrong_preditct += 1
    accuracy = correct_preditct / (correct_preditct + wrong_preditct)
    return accuracy

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)


tree = id3(train_data_m, 'go to: 1)next veget. 2)gas station 3)warehouse 4)sleep 5)GAME OVER')
# print(tree)
# json_str = json.dumps(tree, indent=2, cls=NpEncoder)
# print(json_str)

accuracy = evaluate(tree, test_data_m, 'go to: 1)next veget. 2)gas station 3)warehouse 4)sleep 5)GAME OVER')
# print(accuracy)
added vegetables strorage in tractor and vegetables store 2023-05-25 13:21:43 +02:00			`import pandas as pd`
			`import numpy as np`
			`import json`


			`train_data_m = pd.read_csv("01.csv")`
			`test_data_m = pd.read_csv("10.csv")`

			`def calc_total_entropy(train_data, label, class_list):`
			`total_row = train_data.shape[0]`
			`total_entr = 0`

			`for c in class_list:`
			`total_class_count = train_data[train_data[label] == c].shape[0]`
			`if total_class_count == 0:`
			`total_class_entr = 0`
			`else:`
			`total_class_entr = - (total_class_count / total_row) * np.log2(total_class_count / total_row)`
			`total_entr += total_class_entr`

			`return total_entr`


			`def calc_entropy(feature_value_data, label, class_list):`
			`class_count = feature_value_data.shape[0]`
			`entropy = 0`

			`for c in class_list:`
			`label_class_count = feature_value_data[feature_value_data[label] == c].shape[0]`

			`entropy_class = 0`
			`if label_class_count != 0:`
			`probability_class = label_class_count / class_count`
			`entropy_class = - probability_class * np.log2(probability_class)`

			`entropy += entropy_class`

			`return entropy`


			`def calc_info_gain(feature_name, train_data, label, class_list):`
			`feature_value_list = train_data[feature_name].unique()`
			`total_row = train_data.shape[0]`
			`feature_info = 0.0`

			`for feature_value in feature_value_list:`
			`feature_value_data = train_data[train_data[feature_name] == feature_value]`
			`feature_value_count = feature_value_data.shape[0]`
			`feature_value_entropy = calc_entropy(feature_value_data, label, class_list)`
			`feature_value_probability = feature_value_count / total_row`
			`feature_info += feature_value_probability * feature_value_entropy`

			`return calc_total_entropy(train_data, label, class_list) - feature_info`


			`def find_most_informative_feature(train_data, label, class_list):`
			`feature_list = train_data.columns.drop(label)`
			`max_info_gain = -1`
			`max_info_feature = None`

			`for feature in feature_list:`
			`feature_info_gain = calc_info_gain(feature, train_data, label, class_list)`
			`if max_info_gain < feature_info_gain:`
			`max_info_gain = feature_info_gain`
			`max_info_feature = feature`
			`return max_info_feature`


			`def generate_sub_tree(feature_name, train_data, label, class_list):`
			`feature_value_count_dict = train_data[feature_name].value_counts(sort=False)`
			`tree = {}`
			`for feature_value, count in feature_value_count_dict.items():`
			`feature_value_data = train_data[train_data[feature_name] == feature_value]`

			`assigned_to_node = False`
			`for c in class_list:`
			`class_count = feature_value_data[feature_value_data[label] == c].shape[0]`
			`if class_count == count:`
			`tree[feature_value] = c`
			`train_data = train_data[train_data[feature_name] != feature_value]`
			`assigned_to_node = True`
			`if not assigned_to_node:`
			`tree[feature_value] = "?"`
			`return tree, train_data`


			`def make_tree(root, prev_feature_value, train_data, label, class_list):`
			`if train_data.shape[0] != 0:`
			`max_info_feature = find_most_informative_feature(train_data, label, class_list)`
			`tree, train_data = generate_sub_tree(max_info_feature, train_data, label, class_list)`
			`next_root = None`
			`if prev_feature_value != None:`
			`root[prev_feature_value] = dict()`
			`root[prev_feature_value][max_info_feature] = tree`
			`next_root = root[prev_feature_value][max_info_feature]`
			`else:`
			`root[max_info_feature] = tree`
			`next_root = root[max_info_feature]`

			`for node, branch in list(next_root.items()):`
			`if branch == "?":`
			`feature_value_data = train_data[train_data[max_info_feature] == node]`
			`make_tree(next_root, node, feature_value_data, label, class_list)`


			`def id3(train_data_m, label):`
			`train_data = train_data_m.copy()`
			`tree = {}`
			`class_list = train_data[label].unique()`
			`make_tree(tree, None, train_data, label, class_list)`

			`return tree`

			`def predict(tree, instance):`
			`if not isinstance(tree, dict):`
			`return tree`
			`else:`
			`root_node = next(iter(tree))`
			`feature_value = instance[root_node]`
add decision tree implementation 2023-05-25 15:46:08 +02:00			`print(root_node)`
			`print(feature_value)`
added vegetables strorage in tractor and vegetables store 2023-05-25 13:21:43 +02:00			`if feature_value in tree[root_node]:`
			`return predict(tree[root_node][feature_value], instance)`
			`else:`
			`return None`

add decision tree implementation 2023-05-25 15:46:08 +02:00
added vegetables strorage in tractor and vegetables store 2023-05-25 13:21:43 +02:00			`def evaluate(tree, test_data_m, label):`
			`correct_preditct = 0`
			`wrong_preditct = 0`
			`for index, row in test_data_m.iterrows():`
add decision tree implementation 2023-05-25 15:46:08 +02:00			`print(test_data_m.iloc[index])`
added vegetables strorage in tractor and vegetables store 2023-05-25 13:21:43 +02:00			`result = predict(tree, test_data_m.iloc[index])`
add decision tree implementation 2023-05-25 15:46:08 +02:00			`print()`
added vegetables strorage in tractor and vegetables store 2023-05-25 13:21:43 +02:00			`if result == test_data_m[label].iloc[index]:`
			`correct_preditct += 1`
			`else:`
			`wrong_preditct += 1`
			`accuracy = correct_preditct / (correct_preditct + wrong_preditct)`
			`return accuracy`

			`class NpEncoder(json.JSONEncoder):`
			`def default(self, obj):`
			`if isinstance(obj, np.integer):`
			`return int(obj)`
			`if isinstance(obj, np.floating):`
			`return float(obj)`
			`if isinstance(obj, np.ndarray):`
			`return obj.tolist()`
			`return json.JSONEncoder.default(self, obj)`


			`tree = id3(train_data_m, 'go to: 1)next veget. 2)gas station 3)warehouse 4)sleep 5)GAME OVER')`
			`# print(tree)`
add decision tree implementation 2023-05-25 15:46:08 +02:00			`# json_str = json.dumps(tree, indent=2, cls=NpEncoder)`
			`# print(json_str)`

			`accuracy = evaluate(tree, test_data_m, 'go to: 1)next veget. 2)gas station 3)warehouse 4)sleep 5)GAME OVER')`
			`# print(accuracy)`

added vegetables strorage in tractor and vegetables store 2023-05-25 13:21:43 +02:00