SI_InteligentnyWozekWidlowy/tree/DecisionTree.py

159 lines
4.7 KiB
Python

import csv
import numpy as np
import pandas
import sklearn
from sklearn import metrics, preprocessing
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from InitialStateFactory import InitialStateFactory
from data.ClientParams import ClientParams
from data.Order import Order
from data.enum.CompanySize import CompanySize
from data.enum.Priority import Priority
from util.ClientParamsFactory import ClientParamsFactory
class DecisionTree:
def __init__(self) -> None:
super().__init__()
def generate_data(self, generator: ClientParamsFactory, n:int):
header = ['DELAY',
'PAYED',
'NET-WORTH',
'INFLUENCE',
'SKARBOWKA',
'MEMBER',
'HAT',
'SIZE']
with open("data/TEST/generatedData.csv", 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(header)
for i in range(n):
data = generator.get_client_params()
writer.writerow([data.payment_delay,
data.payed,
data.net_worth,
data.infuence_rate,
data.is_skarbowka,
data.membership,
data.is_hat,
data.company_size])
file.close()
def get_normalized_data(self, X):
label_BP = preprocessing.LabelEncoder()
label_BP.fit(
['CompanySize.NO', 'CompanySize.SMALL', 'CompanySize.NORMAL', 'CompanySize.BIG', 'CompanySize.HUGE',
'CompanySize.GIGANTISHE'])
X[:, 7] = label_BP.transform(X[:, 7])
return X
def print_logs(self, x, y, prediction):
for i in range(len(prediction)):
print("{}. {} \n predicted: {}, actual: {}".format(i, x[i, :], prediction[i], y[i]))
print("\nDecisionTrees's Accuracy: ", metrics.accuracy_score(y, prediction))
def get_data_good(self, orders: [Order]) -> [Order]:
n_array_input = []
for i in range(len(orders)):
o:Order = orders[i]
cp: ClientParams = o.client_params
pom = []
pom.append(cp.payment_delay)
pom.append(cp.payed)
pom.append(cp.net_worth)
pom.append(cp.infuence_rate)
pom.append(cp.is_skarbowka)
pom.append(cp.membership)
pom.append(cp.is_hat)
size: CompanySize = cp.company_size
if(size == CompanySize.NO):
pom.append(0)
if (size == CompanySize.SMALL):
pom.append(1)
if (size == CompanySize.NORMAL):
pom.append(2)
if (size == CompanySize.BIG):
pom.append(3)
if (size == CompanySize.HUGE):
pom.append(4)
if (size == CompanySize.GIGANTISHE):
pom.append(5)
n_array_input.append(pom)
n_array = np.array(n_array_input)
# print(n_array)
# print(n_array[0])
tree = self.get_decision_tree()
priority = tree.predict(n_array)
for i in range(len(orders)):
print(orders[i].priority)
orders[i].priority = priority[i]
if priority[i] == "LOW":
orders[i].priority = Priority.LOW
if priority[i] == "MEDIUM":
orders[i].priority = Priority.MEDIUM
if priority[i] == "HIGH":
orders[i].priority = Priority.HIGH
print(orders[i].priority)
return orders
def get_decision_tree(self) -> DecisionTreeClassifier:
data_input = pandas.read_csv('data/TEST/importedData.csv', delimiter=",")
X_headers = ['DELAY', 'PAYED', 'NET-WORTH', 'INFLUENCE', 'SKARBOWKA', 'MEMBER', 'HAT', 'SIZE']
X = data_input[X_headers].values
Y = data_input["PRIORITY"]
X = self.get_normalized_data(X)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, train_size=0.8)
# print(len(X_train[0]))
# print(X_train[0])
drugTree = DecisionTreeClassifier(criterion="entropy", max_depth=4)
clf = drugTree.fit(X_train, y_train)
predicted = drugTree.predict(X_test)
# print(type(X_test))
y_test = y_test.to_list()
# self.print_logs(X_test, y_test, predicted)
# print(sklearn.tree.export_text(clf, feature_names=X_headers))
return drugTree
# kurwa = DecisionTree()
# kurwa.get_data_good(InitialStateFactory.generate_order_list(50))