SI2020/Sklearn/Generate.py

# Load libraries
import pickle
import pandas as pd
from sklearn import tree, metrics
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, _tree


def tree_to_code(tree, feature_names):
    # f = open('generatedTree.py', 'w')
    tree_ = tree.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]
    # print("def tree({}):".format(", ".join(feature_names)), file=f)
    print("def tree({}):".format(", ".join(feature_names)))

    def recurse(node, depth):
        indent = "  " * depth
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]
            # print("{}if {} <= {}:".format(indent, name, threshold), file=f)
            print("{}if {} <= {}:".format(indent, name, threshold))
            recurse(tree_.children_left[node], depth + 1)
            # print("{}else:  # if {} > {}".format(indent, name, threshold), file=f)
            print("{}else:  # if {} > {}".format(indent, name, threshold))
            recurse(tree_.children_right[node], depth + 1)
        else:
            # print("{}return {}".format(indent, tree_.value[node],), file=f)
            print("{}return {}".format(indent, tree_.value[node]))

    recurse(0, 1)
    # f.close()


def loadLearningBase():
    col_names = ['Warzywo', 'Nawoz', 'Srodek', 'Stan', 'Dzialanie']
    base = pd.read_csv("Database.csv", header=None, names=col_names)
    feature_cols = ['Warzywo', 'Nawoz', 'Srodek', 'Stan']

    """ print dataset"""
    # print(base.head())

    X = base[feature_cols]  # Features
    y = base.Dzialanie  # Target variable

    # Split dataset into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
                                                        random_state=1)  # 70% training and 30% test

    data = generateDecisionTree(X_train, X_test, y_train, y_test)

    """generate data for image"""
    # tree.export_graphviz(data, out_file='treeData.dot', filled=True, rounded=True, special_characters=True,
    #                      feature_names=feature_cols)

    """Printing if_styled tree to console"""
    # tree_to_code(data, feature_cols)

    return data


def generateDecisionTree(X_train, X_test, y_train, y_test):
    # Create Decision Tree classifer object
    clf = DecisionTreeClassifier(criterion="entropy")

    # Train Decision Tree Classifer
    clf = clf.fit(X_train, y_train)

    # Predict the response for test dataset
    y_pred = clf.predict(X_test)

    """Model Accuracy, how often is the classifier correct """
    # print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

    return clf


if __name__ == '__main__':
    generated = loadLearningBase()

    # Save generated tree
    filename = 'decisionTree.sav'
    pickle.dump(generated, open(filename, 'wb'))
Prześlij pliki do 'Sklearn' 2020-05-13 03:14:09 +02:00			`# Load libraries`
			`import pickle`
			`import pandas as pd`
			`from sklearn import tree, metrics`
			`from sklearn.model_selection import train_test_split`
			`from sklearn.tree import DecisionTreeClassifier, _tree`


			`def tree_to_code(tree, feature_names):`
			`# f = open('generatedTree.py', 'w')`
			`tree_ = tree.tree_`
			`feature_name = [`
			`feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"`
			`for i in tree_.feature`
			`]`
			`# print("def tree({}):".format(", ".join(feature_names)), file=f)`
			`print("def tree({}):".format(", ".join(feature_names)))`

			`def recurse(node, depth):`
			`indent = " " * depth`
			`if tree_.feature[node] != _tree.TREE_UNDEFINED:`
			`name = feature_name[node]`
			`threshold = tree_.threshold[node]`
			`# print("{}if {} <= {}:".format(indent, name, threshold), file=f)`
			`print("{}if {} <= {}:".format(indent, name, threshold))`
			`recurse(tree_.children_left[node], depth + 1)`
			`# print("{}else: # if {} > {}".format(indent, name, threshold), file=f)`
			`print("{}else: # if {} > {}".format(indent, name, threshold))`
			`recurse(tree_.children_right[node], depth + 1)`
			`else:`
			`# print("{}return {}".format(indent, tree_.value[node],), file=f)`
			`print("{}return {}".format(indent, tree_.value[node]))`

			`recurse(0, 1)`
			`# f.close()`


			`def loadLearningBase():`
			`col_names = ['Warzywo', 'Nawoz', 'Srodek', 'Stan', 'Dzialanie']`
			`base = pd.read_csv("Database.csv", header=None, names=col_names)`
			`feature_cols = ['Warzywo', 'Nawoz', 'Srodek', 'Stan']`

			`""" print dataset"""`
			`# print(base.head())`

			`X = base[feature_cols] # Features`
			`y = base.Dzialanie # Target variable`

			`# Split dataset into training set and test set`
			`X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,`
			`random_state=1) # 70% training and 30% test`

			`data = generateDecisionTree(X_train, X_test, y_train, y_test)`

			`"""generate data for image"""`
			`# tree.export_graphviz(data, out_file='treeData.dot', filled=True, rounded=True, special_characters=True,`
			`# feature_names=feature_cols)`

			`"""Printing if_styled tree to console"""`
			`# tree_to_code(data, feature_cols)`

			`return data`


			`def generateDecisionTree(X_train, X_test, y_train, y_test):`
			`# Create Decision Tree classifer object`
			`clf = DecisionTreeClassifier(criterion="entropy")`

			`# Train Decision Tree Classifer`
			`clf = clf.fit(X_train, y_train)`

			`# Predict the response for test dataset`
			`y_pred = clf.predict(X_test)`

			`"""Model Accuracy, how often is the classifier correct """`
			`# print("Accuracy:", metrics.accuracy_score(y_test, y_pred))`

			`return clf`


			`if __name__ == '__main__':`
			`generated = loadLearningBase()`

			`# Save generated tree`
			`filename = 'decisionTree.sav'`
			`pickle.dump(generated, open(filename, 'wb'))`