2023-05-27 11:34:26 +02:00
|
|
|
import os
|
|
|
|
from trainingData import TrainingData
|
2023-05-28 02:50:07 +02:00
|
|
|
from sklearn import tree
|
|
|
|
import joblib
|
|
|
|
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
|
|
|
|
import numpy as np
|
2023-05-27 11:34:26 +02:00
|
|
|
|
|
|
|
def _read_training_data() -> TrainingData:
|
2023-05-28 02:50:07 +02:00
|
|
|
attributes = []
|
|
|
|
classes = []
|
|
|
|
location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
|
|
|
file = open(os.path.join(location, 'training_data.csv'))
|
2023-05-27 11:34:26 +02:00
|
|
|
lines = file.readlines()[1:]
|
|
|
|
file.close()
|
|
|
|
for line in lines:
|
|
|
|
actual_row = line.replace('\n', '')
|
|
|
|
values = actual_row.split(',')
|
|
|
|
line_attributes = values[:-1]
|
|
|
|
line_class = values[-1]
|
|
|
|
attributes.append(line_attributes)
|
2023-05-29 12:00:46 +02:00
|
|
|
classes.append(line_class.strip())
|
2023-05-27 11:34:26 +02:00
|
|
|
return TrainingData(attributes, classes)
|
|
|
|
|
2023-05-29 12:00:46 +02:00
|
|
|
def _attributes_to_floats(attributes: list[str]) -> list[float]:
|
2023-05-29 09:57:52 +02:00
|
|
|
output: list[float] = []
|
|
|
|
if attributes[0] == 'Longitiudonal':
|
|
|
|
output.append(0)
|
|
|
|
elif attributes[0] == 'Round':
|
|
|
|
output.append(1)
|
|
|
|
elif attributes[0] == 'Flat':
|
|
|
|
output.append(2)
|
|
|
|
elif attributes[0] == 'Irregular':
|
|
|
|
output.append(3)
|
|
|
|
|
|
|
|
if attributes[1] == 'Low':
|
|
|
|
output.append(0)
|
|
|
|
elif attributes[1] == 'Medium':
|
|
|
|
output.append(1)
|
|
|
|
elif attributes[1] == 'High':
|
|
|
|
output.append(2)
|
|
|
|
|
|
|
|
|
|
|
|
if attributes[2] == "Yes":
|
|
|
|
output.append(0)
|
|
|
|
else:
|
|
|
|
output.append(1)
|
|
|
|
|
|
|
|
if attributes[3] == 'Low':
|
|
|
|
output.append(0)
|
|
|
|
elif attributes[3] == 'Medium':
|
|
|
|
output.append(1)
|
|
|
|
elif attributes[3] == 'High':
|
|
|
|
output.append(2)
|
|
|
|
|
|
|
|
if attributes[4] == 'Low':
|
|
|
|
output.append(0)
|
|
|
|
elif attributes[4] == 'Medium':
|
|
|
|
output.append(1)
|
|
|
|
elif attributes[4] == 'High':
|
|
|
|
output.append(2)
|
|
|
|
|
|
|
|
if attributes[5] == 'Transparent':
|
|
|
|
output.append(0)
|
|
|
|
elif attributes[5] == 'Light':
|
|
|
|
output.append(1)
|
|
|
|
elif attributes[5] == 'Dark':
|
|
|
|
output.append(2)
|
|
|
|
elif attributes[5] == "Colorful":
|
|
|
|
output.append(3)
|
|
|
|
|
|
|
|
if attributes[6] == 'Low':
|
|
|
|
output.append(0)
|
|
|
|
elif attributes[6] == 'Medium':
|
|
|
|
output.append(1)
|
|
|
|
elif attributes[6] == 'High':
|
|
|
|
output.append(2)
|
|
|
|
|
|
|
|
if attributes[7] == "Yes":
|
|
|
|
output.append(0)
|
|
|
|
else:
|
|
|
|
output.append(1)
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
2023-05-28 02:50:07 +02:00
|
|
|
|
2023-05-27 11:34:26 +02:00
|
|
|
trainning_data = _read_training_data()
|
2023-05-28 02:50:07 +02:00
|
|
|
|
|
|
|
X = trainning_data.attributes
|
|
|
|
Y = trainning_data.classes
|
|
|
|
|
|
|
|
|
|
|
|
model = tree.DecisionTreeClassifier()
|
2023-05-29 09:57:52 +02:00
|
|
|
encoded = [_attributes_to_floats(x) for x in X]
|
|
|
|
dtc = model.fit(encoded, Y)
|
2023-05-28 02:50:07 +02:00
|
|
|
joblib.dump(model, 'model.pkl')
|