87 lines
3.0 KiB
Python
87 lines
3.0 KiB
Python
|
# Load libraries
|
||
|
import pickle
|
||
|
import pandas as pd
|
||
|
from sklearn import tree, metrics
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
from sklearn.tree import DecisionTreeClassifier, _tree
|
||
|
|
||
|
|
||
|
def tree_to_code(tree, feature_names):
|
||
|
# f = open('generatedTree.py', 'w')
|
||
|
tree_ = tree.tree_
|
||
|
feature_name = [
|
||
|
feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
|
||
|
for i in tree_.feature
|
||
|
]
|
||
|
# print("def tree({}):".format(", ".join(feature_names)), file=f)
|
||
|
print("def tree({}):".format(", ".join(feature_names)))
|
||
|
|
||
|
def recurse(node, depth):
|
||
|
indent = " " * depth
|
||
|
if tree_.feature[node] != _tree.TREE_UNDEFINED:
|
||
|
name = feature_name[node]
|
||
|
threshold = tree_.threshold[node]
|
||
|
# print("{}if {} <= {}:".format(indent, name, threshold), file=f)
|
||
|
print("{}if {} <= {}:".format(indent, name, threshold))
|
||
|
recurse(tree_.children_left[node], depth + 1)
|
||
|
# print("{}else: # if {} > {}".format(indent, name, threshold), file=f)
|
||
|
print("{}else: # if {} > {}".format(indent, name, threshold))
|
||
|
recurse(tree_.children_right[node], depth + 1)
|
||
|
else:
|
||
|
# print("{}return {}".format(indent, tree_.value[node],), file=f)
|
||
|
print("{}return {}".format(indent, tree_.value[node]))
|
||
|
|
||
|
recurse(0, 1)
|
||
|
# f.close()
|
||
|
|
||
|
|
||
|
def loadLearningBase():
|
||
|
col_names = ['Warzywo', 'Nawoz', 'Srodek', 'Stan', 'Dzialanie']
|
||
|
base = pd.read_csv("Database.csv", header=None, names=col_names)
|
||
|
feature_cols = ['Warzywo', 'Nawoz', 'Srodek', 'Stan']
|
||
|
|
||
|
""" print dataset"""
|
||
|
# print(base.head())
|
||
|
|
||
|
X = base[feature_cols] # Features
|
||
|
y = base.Dzialanie # Target variable
|
||
|
|
||
|
# Split dataset into training set and test set
|
||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
|
||
|
random_state=1) # 70% training and 30% test
|
||
|
|
||
|
data = generateDecisionTree(X_train, X_test, y_train, y_test)
|
||
|
|
||
|
"""generate data for image"""
|
||
|
# tree.export_graphviz(data, out_file='treeData.dot', filled=True, rounded=True, special_characters=True,
|
||
|
# feature_names=feature_cols)
|
||
|
|
||
|
"""Printing if_styled tree to console"""
|
||
|
# tree_to_code(data, feature_cols)
|
||
|
|
||
|
return data
|
||
|
|
||
|
|
||
|
def generateDecisionTree(X_train, X_test, y_train, y_test):
|
||
|
# Create Decision Tree classifer object
|
||
|
clf = DecisionTreeClassifier(criterion="entropy")
|
||
|
|
||
|
# Train Decision Tree Classifer
|
||
|
clf = clf.fit(X_train, y_train)
|
||
|
|
||
|
# Predict the response for test dataset
|
||
|
y_pred = clf.predict(X_test)
|
||
|
|
||
|
"""Model Accuracy, how often is the classifier correct """
|
||
|
# print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
|
||
|
|
||
|
return clf
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
generated = loadLearningBase()
|
||
|
|
||
|
# Save generated tree
|
||
|
filename = 'decisionTree.sav'
|
||
|
pickle.dump(generated, open(filename, 'wb'))
|