SI2020/Sklearn/Generate.py

87 lines
3.0 KiB
Python

# Load libraries
import pickle
import pandas as pd
from sklearn import tree, metrics
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, _tree
def tree_to_code(tree, feature_names):
# f = open('generatedTree.py', 'w')
tree_ = tree.tree_
feature_name = [
feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
for i in tree_.feature
]
# print("def tree({}):".format(", ".join(feature_names)), file=f)
print("def tree({}):".format(", ".join(feature_names)))
def recurse(node, depth):
indent = " " * depth
if tree_.feature[node] != _tree.TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
# print("{}if {} <= {}:".format(indent, name, threshold), file=f)
print("{}if {} <= {}:".format(indent, name, threshold))
recurse(tree_.children_left[node], depth + 1)
# print("{}else: # if {} > {}".format(indent, name, threshold), file=f)
print("{}else: # if {} > {}".format(indent, name, threshold))
recurse(tree_.children_right[node], depth + 1)
else:
# print("{}return {}".format(indent, tree_.value[node],), file=f)
print("{}return {}".format(indent, tree_.value[node]))
recurse(0, 1)
# f.close()
def loadLearningBase():
col_names = ['Warzywo', 'Nawoz', 'Srodek', 'Stan', 'Dzialanie']
base = pd.read_csv("Database.csv", header=None, names=col_names)
feature_cols = ['Warzywo', 'Nawoz', 'Srodek', 'Stan']
""" print dataset"""
# print(base.head())
X = base[feature_cols] # Features
y = base.Dzialanie # Target variable
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=1) # 70% training and 30% test
data = generateDecisionTree(X_train, X_test, y_train, y_test)
"""generate data for image"""
# tree.export_graphviz(data, out_file='treeData.dot', filled=True, rounded=True, special_characters=True,
# feature_names=feature_cols)
"""Printing if_styled tree to console"""
# tree_to_code(data, feature_cols)
return data
def generateDecisionTree(X_train, X_test, y_train, y_test):
# Create Decision Tree classifer object
clf = DecisionTreeClassifier(criterion="entropy")
# Train Decision Tree Classifer
clf = clf.fit(X_train, y_train)
# Predict the response for test dataset
y_pred = clf.predict(X_test)
"""Model Accuracy, how often is the classifier correct """
# print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
return clf
if __name__ == '__main__':
generated = loadLearningBase()
# Save generated tree
filename = 'decisionTree.sav'
pickle.dump(generated, open(filename, 'wb'))