import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split import category_encoders as ce from sklearn.metrics import accuracy_score from sklearn.tree import DecisionTreeClassifier from sklearn import tree import random import graphviz import warnings warnings.filterwarnings('ignore') data = './database/datasetadult.csv' df = pd.read_csv(data) #print(df.shape) #print(df.head()) #print(df.info()) #print(df['Outfit'].value_counts()) #print(df.isnull().sum()) X = df.drop(['Adult'], axis=1) y = df['Adult'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 42) #print(X_train.shape, X_test.shape) #print(X_train.dtypes) #print(X_train.head()) encoder = ce.OrdinalEncoder(cols=['Wrinkles', 'Balding', 'Beard', 'Outfit', 'Glasses', 'Tattoo', 'Hair', 'Behaviour']) X_train = encoder.fit_transform(X_train) X_test = encoder.transform(X_test) #print(X_train.head()) #print(X_test.head()) clf_en = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0) clf_en.fit(X_train, y_train) y_pred_en = clf_en.predict(X_test) print('Model accuracy score with criterion entropy: {0:0.4f}'.format(accuracy_score(y_test, y_pred_en))) y_pred_train_en = clf_en.predict(X_train) print('Training-set accuracy score: {0:0.4f}'.format(accuracy_score(y_train, y_pred_train_en))) print('Training set score: {:.4f}'.format(clf_en.score(X_train, y_train))) print('Test set score: {:.4f}'.format(clf_en.score(X_test, y_test))) dot_data = tree.export_graphviz(clf_en, out_file=None, feature_names=X_train.columns, class_names=y_train.unique(), filled=True, rounded=True, special_characters=True) #nowy klient testowo new_client = { "Wrinkles": random.choice(['Yes', 'No']), "Balding": random.choice(['Yes', 'No']), "Beard": random.choice(['Yes', 'No']), "Outfit": random.choice(['Messy', 'Casual','Formal']), "Glasses": random.choice(['Yes', 'No']), "Tattoo": random.choice(['Yes', 'No']), "Hair": random.choice(['Color', 'Grey', 'Natural']), "Behaviour": random.choice(['Energetic', 'Stressed', 'Calm']) } new_client_df = pd.DataFrame(new_client, index=[0]) new_client_df_encoded = encoder.transform(new_client_df) prediction = clf_en.predict(new_client_df_encoded) print("\nNew client:") print(new_client_df) print("Prediction:", prediction[0]) #graph = graphviz.Source(dot_data) #graph.render("decision_tree", format='png')