Merge branch 'joarad'

final merge
Dodano skrypt użyty do wygenerowania danych do generacji drzewa
2021-06-22 22:21:20 +02:00 · 2021-06-22 22:21:03 +02:00 · 2021-05-19 20:54:09 +02:00 · 2021-05-18 23:42:07 +02:00
4 changed files with 6352 additions and 0 deletions
--- a/data_dd2.csv
+++ b/data_dd2.csv
--- a/data_dd3.csv
+++ b/data_dd3.csv
--- a/dt.py
+++ b/dt.py
@ -0,0 +1,160 @@
 import numpy as np
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 class GadId3Classifier:
  def fit(self, input, output):
    data = input.copy()
    data[output.name] = output
    self.tree = self.decision_tree(data, data, input.columns, output.name)
  def predict(self, input):
    # convert input data into a dictionary of samples
    samples = input.to_dict(orient='records')
    predictions = []
    # make a prediction for every sample
    for sample in samples:
      predictions.append(self.make_prediction(sample, self.tree, 1.0))
    return predictions
  def entropy(self, attribute_column):
    # find unique values and their frequency counts for the given attribute
    values, counts = np.unique(attribute_column, return_counts=True)
    # calculate entropy for each unique value
    entropy_list = []
    for i in range(len(values)):
      probability = counts[i]/np.sum(counts)
      entropy_list.append(-probability*np.log2(probability))
    # calculate sum of individual entropy values
    total_entropy = np.sum(entropy_list)
    return total_entropy
  def information_gain(self, data, feature_attribute_name, target_attribute_name):
    # find total entropy of given subset
    total_entropy = self.entropy(data[target_attribute_name])
    # find unique values and their frequency counts for the attribute to be split
    values, counts = np.unique(data[feature_attribute_name], return_counts=True)
    # calculate weighted entropy of subset
    weighted_entropy_list = []
    for i in range(len(values)):
      subset_probability = counts[i]/np.sum(counts)
      subset_entropy = self.entropy(data.where(data[feature_attribute_name]==values[i]).dropna()[target_attribute_name])
      weighted_entropy_list.append(subset_probability*subset_entropy)
    total_weighted_entropy = np.sum(weighted_entropy_list)
    # calculate information gain
    information_gain = total_entropy - total_weighted_entropy
    return information_gain
  def decision_tree(self, data, orginal_data, feature_attribute_names, target_attribute_name, parent_node_class=None):
    # base cases:
    # if data is pure, return the majority class of subset
    unique_classes = np.unique(data[target_attribute_name])
    if len(unique_classes) <= 1:
      return unique_classes[0]
    # if subset is empty, ie. no samples, return majority class of original data
    elif len(data) == 0:
      majority_class_index = np.argmax(np.unique(original_data[target_attribute_name], return_counts=True)[1])
      return np.unique(original_data[target_attribute_name])[majority_class_index]
    # if data set contains no features to train with, return parent node class
    elif len(feature_attribute_names) == 0:
      return parent_node_class
    # if none of the above are true, construct a branch:
    else:
      # determine parent node class of current branch
      majority_class_index = np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])
      parent_node_class = unique_classes[majority_class_index]
      # determine information gain values for each feature
      # choose feature which best splits the data, ie. highest value
      ig_values = [self.information_gain(data, feature, target_attribute_name) for feature in feature_attribute_names]
      best_feature_index = np.argmax(ig_values)
      best_feature = feature_attribute_names[best_feature_index]
      # create tree structure, empty at first
      tree = {best_feature: {}}
      # remove best feature from available features, it will become the parent node
      feature_attribute_names = [i for i in feature_attribute_names if i != best_feature]
      # create nodes under parent node
      parent_attribute_values = np.unique(data[best_feature])
      for value in parent_attribute_values:
        sub_data = data.where(data[best_feature] == value).dropna()
        # call the algorithm recursively
        subtree = self.decision_tree(sub_data, orginal_data, feature_attribute_names, target_attribute_name, parent_node_class)
        # add subtree to original tree
        tree[best_feature][value] = subtree
      return tree
  def make_prediction(self, sample, tree, default=1):
    # map sample data to tree
    for attribute in list(sample.keys()):
      # check if feature exists in tree
      if attribute in list(tree.keys()):
        try:
          result = tree[attribute][sample[attribute]]
        except:
          return default
        result = tree[attribute][sample[attribute]]
        # if more attributes exist within result, recursively find best result
        if isinstance(result, dict):
          return self.make_prediction(sample, result)
        else:
          return result
 #data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
 #df = pd.read_csv(data_url, header=None)
 df = pd.read_csv("data_dd3.csv", header=None)
 # rename known columns
 columns = ['p_strength','p_agility','p_wisdom','p_health','p_melee_damage','p_ranged_damage','p_magic_damage',
           'p_armor_defence','p_armor_magic_protection','e_strength','e_agility','e_wisdom','e_health','e_melee_damage',
           'e_ranged_damage','e_magic_damage','e_armor_defence','e_armor_magic_protection','e_attack_type','strategy']
 #columns = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg',
           #'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'disease_present']
 df.columns = columns
 # convert disease_present feature to binary
 # df['disease_present'] = df.disease_present.replace([1,2,3,4], 1)
 # drop rows with missing values, missing = ?
 df = df.replace("?", np.nan)
 df = df.dropna()
 # organize data into input and output
 #X = df.drop(columns="disease_present")
 #y = df["disease_present"]
 X = df.drop(columns="strategy")
 y = df["strategy"]
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
 # initialize and fit model
 model = GadId3Classifier()
 model.fit(X_train, y_train)
 # return accuracy score
 y_pred = model.predict(X_test)
 a = accuracy_score(y_test, y_pred)
 print(a)
 #print(y_pred)
 #print(y_test)
--- a/tree_data_gen.py
+++ b/tree_data_gen.py
@ -0,0 +1,192 @@
 import random
 from os import urandom
 import statistics
 import csv
 def nominalizeOld(val, max_val):
    return_value = "NONE"
    if val > 0.8 * max_val:
        return_value = "VERY_HIGH"
    elif val > 0.6 * max_val:
        return_value = "HIGH"
    elif val > 0.4 * max_val:
        return_value = "MEDIUM"
    elif val > 0.2 * max_val:
        return_value = "LOW"
    elif val > 0:
        return_value = "VERY_LOW"
    return return_value
 def nominalize(val, max_val):
    return_value = "NONE"
    if val > 0.66 * max_val:
        return_value = "HIGH"
    elif val > 0.33 * max_val:
        return_value = "MEDIUM"
    elif val > 0:
        return_value = "LOW"
    return return_value
 class Stats:
    def __init__(self):
        self.strength = random.randint(1, 10)
        self.agility = random.randint(1, 10)
        self.wisdom = random.randint(1, 10)
        self.health = random.randint(1, 50)
        self.melee_wep_damage = random.randint(1, 10)
        self.ranged_wep_damage = random.randint(1, 10)
        self.magic_wep_damage = random.randint(1, 10)
        self.armor_defence = random.randint(0, 5)
        self.armor_magic_protection = random.randint(0, 5)
        self.damage = 0
    def meleeAttack(self, opponent):
        attackValue = self.strength + random.randint(1, 6)
        defenseValue = opponent.strength + opponent.armor_defence
        damage = attackValue - defenseValue
        if damage > 0:
            opponent.damage += (damage + self.melee_wep_damage)
    def rangeAttack(self, opponent):
        attackValue = self.agility + random.randint(1, 6)
        defenseValue = opponent.agility
        damage = attackValue - defenseValue
        if (damage > 0) and (damage + self.ranged_wep_damage - opponent.armor_defence > 0):
            opponent.damage += (damage + self.ranged_wep_damage - opponent.armor_defence)
    def magicAttack(self, opponent):
        attackValue = self.wisdom + random.randint(1, 6)
        defenseValue = opponent.wisdom
        damage = attackValue - defenseValue
        if (damage > 0) and (damage + self.magic_wep_damage - opponent.armor_magic_protection > 0):
            opponent.damage += (damage + self.magic_wep_damage - opponent.armor_magic_protection)
    def reset(self):
        self.damage = 0
 FIELDNAMES = ["p_strength",
              "p_agility",
              "p_wisdom",
              "p_health",
              "p_melee_damage",
              "p_ranged_damage",
              "p_magic_damage",
              "p_armor_defence",
              "p_armor_magic_protection",
              "e_strength",
              "e_agility",
              "e_wisdom",
              "e_health",
              "e_damage",
              "e_armor_defence",
              "e_armor_magic_protection",
              "e_attack_type",
              "strategy"]
 RESULT_FILE = open('data.csv', 'w', newline='')
 FILE_WRITER = csv.writer(RESULT_FILE, dialect='excel', delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
 FILE_WRITER.writerow(FIELDNAMES)
 SETUP_RESULTS = [[], [], []]
 MAX_COMBAT_TIME = 20
 def try_combat(my_seed, p, e, player_att_type, enemy_att_type):
    random.seed(my_seed)
    current_iteration = 0
    while True:
        if player_att_type == 0:
            p.meleeAttack(e)
        elif player_att_type == 1:
            p.rangeAttack(e)
        else:
            p.magicAttack(e)
        if e.damage >= e.health:
            SETUP_RESULTS[player_att_type].append(p.health - p.damage)
            break
        if enemy_att_type == 0:
            e.meleeAttack(p)
        elif enemy_att_type == 1:
            e.rangeAttack(p)
        else:
            e.magicAttack(p)
        if p.damage >= p.health:
            SETUP_RESULTS[player_att_type].append(0)
            break
        current_iteration += 1
        if current_iteration >= MAX_COMBAT_TIME:
            SETUP_RESULTS[player_att_type].append(0)
            break
    p.reset()
    e.reset()
 for trial in range(10000):
    stat_seed = urandom(16)
    random.seed(stat_seed)
    player = Stats()
    enemy = Stats()
    enemy_attack_type = random.randint(0, 2)  # Enemy weapon choice
    for i in range(30):
        combat_seed = urandom(16)
        try_combat(combat_seed, player, enemy, 0, enemy_attack_type)
        try_combat(combat_seed, player, enemy, 1, enemy_attack_type)
        try_combat(combat_seed, player, enemy, 2, enemy_attack_type)
    for i, series in enumerate(SETUP_RESULTS):
        SETUP_RESULTS[i] = statistics.mean(series)
    strategy = "PASS"
    if any(SETUP_RESULTS):
        max_index = SETUP_RESULTS.index(max(SETUP_RESULTS))
        if max_index == 0:
            strategy = "MELEE"
        elif max_index == 1:
            strategy = "RANGED"
        elif max_index == 2:
            strategy = "MAGIC"
    enemy_damage = 0
    if enemy_attack_type == 0:
        enemy_attack_type = "MELEE"
        enemy_damage = enemy.melee_wep_damage
    elif enemy_attack_type == 1:
        enemy_attack_type = "RANGED"
        enemy_damage = enemy.ranged_wep_damage
    elif enemy_attack_type == 2:
        enemy_attack_type = "MAGIC"
        enemy_damage = enemy.magic_wep_damage
    FILE_WRITER.writerow([nominalize(player.strength, 10),
                          nominalize(player.agility, 10),
                          nominalize(player.wisdom, 10),
                          nominalize(player.health, 50),
                          nominalize(player.melee_wep_damage, 10),
                          nominalize(player.ranged_wep_damage, 10),
                          nominalize(player.magic_wep_damage, 10),
                          nominalize(player.armor_defence, 5),
                          nominalize(player.armor_magic_protection, 5),
                          nominalize(enemy.strength, 10),
                          nominalize(enemy.agility, 10),
                          nominalize(enemy.wisdom, 10),
                          nominalize(enemy.health, 50),
                          nominalize(enemy_damage, 10),
                          nominalize(enemy.armor_defence, 5),
                          nominalize(enemy.armor_magic_protection, 5),
                          enemy_attack_type,
                          strategy])
    SETUP_RESULTS = [[], [], []]
    if trial%100 == 0:
        print("Trials done: " + str(trial))
Author	SHA1	Message	Date
tubks	1494a717f6	Merge branch 'joarad'	2021-06-22 22:21:20 +02:00
tubks	daecac46b7	final merge	2021-06-22 22:21:03 +02:00
Niebby	3af4d0e68f	Dodano skrypt użyty do wygenerowania danych do generacji drzewa	2021-05-19 20:54:09 +02:00
Grzegorz Gapiński	26927b6a1d	decision tree algorythm in python with datasets for model	2021-05-18 23:42:07 +02:00