AI-2020/customertree.py

106 lines
3.8 KiB
Python

import pandas as pandas
import graphviz
import pydot
from learning_db import *
from joblib import dump
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.externals.six import StringIO
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
customers = pandas.DataFrame({"gender": gender,
"age": age,
"outfit": outfit,
"cash": cash,
"time": time,
"vege": vege,
"food": food,
"drink": drink
})
customers["gender"], gender_objects = pandas.factorize(customers["gender"])
customers["age"], age_objects = pandas.factorize(customers["age"])
customers["outfit"], outfit_objects = pandas.factorize(customers["outfit"])
customers["cash"], cash_objects = pandas.factorize(customers["cash"])
customers["time"], time_objects = pandas.factorize(customers["time"])
customers["vege"], vege_objects = pandas.factorize(customers["vege"])
customers["food"], food_objects = pandas.factorize(customers["food"])
customers["drink"], drink_objects = pandas.factorize(customers["drink"])
objects = []
objects.append(gender_objects)
objects.append(age_objects)
objects.append(outfit_objects)
objects.append(cash_objects)
objects.append(time_objects)
objects.append(vege_objects)
objects.append(food_objects)
objects.append(drink_objects)
#X = customers.drop(["food","drink"], axis=1)
#y = customers["food"]
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=None)
#food_classifier = DecisionTreeClassifier(criterion = "entropy", random_state=1)
#food_classifier.fit(X_train, y_train)
#porównanie kryterium: index Giniego i entropia
#food_classifier1 = DecisionTreeClassifier()
#food_classifier1.fit(X_train, y_train)
#food_classifier2 = DecisionTreeClassifier(criterion = "entropy")
#food_classifier2.fit(X_train, y_train)
#y_pred1 = food_classifier1.predict(X_test)
#y_pred2 = food_classifier2.predict(X_test)
#if accuracy_score(y_test, y_pred1) > accuracy_score(y_test, y_pred2):
# dump(food_classifier,'models/food_model.joblib')
#else:
# dump(food_classifier2,'models/food_model.joblib')
#dot_data=StringIO()
#tree = tree.export_graphviz(food_classifier, out_file = dot_data,
# feature_names = X.columns,
# class_names = food_objects,
# filled = True, rounded = True)
#graph = pydot.graph_from_dot_data(dot_data.getvalue())
#graph[0].write_pdf("graphs/food_model.pdf")
#X = customers.drop(["food","drink"], axis=1)
#y = customers["drink"]
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.45, random_state=1)
#drink_classifier = DecisionTreeClassifier(criterion = "entropy")
#drink_classifier.fit(X_train, y_train)
#drink_classifier1 = DecisionTreeClassifier()
#drink_classifier1.fit(X_train, y_train)
#drink_classifier2 = DecisionTreeClassifier(criterion = "entropy")
#drink_classifier2.fit(X_train, y_train)
#y_pred1 = drink_classifier1.predict(X_test)
#y_pred2 = drink_classifier2.predict(X_test)
#if accuracy_score(y_test, y_pred1) > accuracy_score(y_test, y_pred2):
# dump(drink_classifier1,'models/drink_model.joblib')
#else:
# dump(drink_classifier2,'models/drink_model.joblib')
#dot_data=StringIO()
#tree = tree.export_graphviz(drink_classifier, out_file = dot_data,
# feature_names = X.columns,
# class_names = drink_objects,
# filled = True, rounded = True)
#graph = pydot.graph_from_dot_data(dot_data.getvalue())
#graph[0].write_pdf("graphs/drink_model.pdf")