SI2020/Sklearn/Generate.py

# Load libraries
import pickle
import pandas as pd
from sklearn import tree, metrics
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, _tree


def tree_to_code(tree, feature_names):
    # f = open('generatedTree.py', 'w')
    tree_ = tree.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]
    # print("def tree({}):".format(", ".join(feature_names)), file=f)
    print("def tree({}):".format(", ".join(feature_names)))

    def recurse(node, depth):
        indent = "  " * depth
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]
            # print("{}if {} <= {}:".format(indent, name, threshold), file=f)
            print("{}if {} <= {}:".format(indent, name, threshold))
            recurse(tree_.children_left[node], depth + 1)
            # print("{}else:  # if {} > {}".format(indent, name, threshold), file=f)
            print("{}else:  # if {} > {}".format(indent, name, threshold))
            recurse(tree_.children_right[node], depth + 1)
        else:
            # print("{}return {}".format(indent, tree_.value[node],), file=f)
            print("{}return {}".format(indent, tree_.value[node]))

    recurse(0, 1)
    # f.close()


def loadLearningBase():
    col_names = ['Warzywo', 'Nawoz', 'Srodek', 'Stan', 'Dzialanie']
    base = pd.read_csv("Database.csv", header=None, names=col_names)
    feature_cols = ['Warzywo', 'Nawoz', 'Srodek', 'Stan']

    """ print dataset"""
    # print(base.head())

    X = base[feature_cols]  # Features
    y = base.Dzialanie  # Target variable

    # Split dataset into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
                                                        random_state=1)  # 70% training and 30% test

    data = generateDecisionTree(X_train, X_test, y_train, y_test)

    """generate data for image"""
    # tree.export_graphviz(data, out_file='treeData.dot', filled=True, rounded=True, special_characters=True,
    #                      feature_names=feature_cols)

    """Printing if_styled tree to console"""
    # tree_to_code(data, feature_cols)

    return data


def generateDecisionTree(X_train, X_test, y_train, y_test):
    # Create Decision Tree classifer object
    clf = DecisionTreeClassifier(criterion="entropy")

    # Train Decision Tree Classifer
    clf = clf.fit(X_train, y_train)

    # Predict the response for test dataset
    y_pred = clf.predict(X_test)

    """Model Accuracy, how often is the classifier correct """
    # print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

    return clf


if __name__ == '__main__':
    generated = loadLearningBase()

    # Save generated tree
    filename = 'decisionTree.sav'
    pickle.dump(generated, open(filename, 'wb'))