# Load libraries import pickle import pandas as pd from sklearn import tree, metrics from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier, _tree def tree_to_code(tree, feature_names): # f = open('generatedTree.py', 'w') tree_ = tree.tree_ feature_name = [ feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" for i in tree_.feature ] # print("def tree({}):".format(", ".join(feature_names)), file=f) print("def tree({}):".format(", ".join(feature_names))) def recurse(node, depth): indent = " " * depth if tree_.feature[node] != _tree.TREE_UNDEFINED: name = feature_name[node] threshold = tree_.threshold[node] # print("{}if {} <= {}:".format(indent, name, threshold), file=f) print("{}if {} <= {}:".format(indent, name, threshold)) recurse(tree_.children_left[node], depth + 1) # print("{}else: # if {} > {}".format(indent, name, threshold), file=f) print("{}else: # if {} > {}".format(indent, name, threshold)) recurse(tree_.children_right[node], depth + 1) else: # print("{}return {}".format(indent, tree_.value[node],), file=f) print("{}return {}".format(indent, tree_.value[node])) recurse(0, 1) # f.close() def loadLearningBase(): col_names = ['Warzywo', 'Nawoz', 'Srodek', 'Stan', 'Dzialanie'] base = pd.read_csv("Database.csv", header=None, names=col_names) feature_cols = ['Warzywo', 'Nawoz', 'Srodek', 'Stan'] """ print dataset""" # print(base.head()) X = base[feature_cols] # Features y = base.Dzialanie # Target variable # Split dataset into training set and test set X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test data = generateDecisionTree(X_train, X_test, y_train, y_test) """generate data for image""" # tree.export_graphviz(data, out_file='treeData.dot', filled=True, rounded=True, special_characters=True, # feature_names=feature_cols) """Printing if_styled tree to console""" # tree_to_code(data, feature_cols) return data def generateDecisionTree(X_train, X_test, y_train, y_test): # Create Decision Tree classifer object clf = DecisionTreeClassifier(criterion="entropy") # Train Decision Tree Classifer clf = clf.fit(X_train, y_train) # Predict the response for test dataset y_pred = clf.predict(X_test) """Model Accuracy, how often is the classifier correct """ # print("Accuracy:", metrics.accuracy_score(y_test, y_pred)) return clf if __name__ == '__main__': generated = loadLearningBase() # Save generated tree filename = 'decisionTree.sav' pickle.dump(generated, open(filename, 'wb'))