decisiontree1
This commit is contained in:
parent
f2e6995cd2
commit
b5e69bcc97
76
decisiontree.py
Normal file
76
decisiontree.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
import category_encoders as ce
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
from sklearn.tree import DecisionTreeClassifier
|
||||||
|
from sklearn import tree
|
||||||
|
import random
|
||||||
|
import graphviz
|
||||||
|
import warnings
|
||||||
|
warnings.filterwarnings('ignore')
|
||||||
|
|
||||||
|
data = './database/datasetadult.csv'
|
||||||
|
df = pd.read_csv(data)
|
||||||
|
#print(df.shape)
|
||||||
|
#print(df.head())
|
||||||
|
#print(df.info())
|
||||||
|
#print(df['Outfit'].value_counts())
|
||||||
|
#print(df.isnull().sum())
|
||||||
|
|
||||||
|
X = df.drop(['Adult'], axis=1)
|
||||||
|
|
||||||
|
y = df['Adult']
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 42)
|
||||||
|
#print(X_train.shape, X_test.shape)
|
||||||
|
#print(X_train.dtypes)
|
||||||
|
#print(X_train.head())
|
||||||
|
|
||||||
|
encoder = ce.OrdinalEncoder(cols=['Wrinkles', 'Balding', 'Beard', 'Outfit', 'Glasses', 'Tattoo', 'Hair', 'Behaviour'])
|
||||||
|
|
||||||
|
X_train = encoder.fit_transform(X_train)
|
||||||
|
|
||||||
|
X_test = encoder.transform(X_test)
|
||||||
|
|
||||||
|
#print(X_train.head())
|
||||||
|
#print(X_test.head())
|
||||||
|
clf_en = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0)
|
||||||
|
clf_en.fit(X_train, y_train)
|
||||||
|
|
||||||
|
y_pred_en = clf_en.predict(X_test)
|
||||||
|
print('Model accuracy score with criterion entropy: {0:0.4f}'.format(accuracy_score(y_test, y_pred_en)))
|
||||||
|
|
||||||
|
y_pred_train_en = clf_en.predict(X_train)
|
||||||
|
print('Training-set accuracy score: {0:0.4f}'.format(accuracy_score(y_train, y_pred_train_en)))
|
||||||
|
|
||||||
|
print('Training set score: {:.4f}'.format(clf_en.score(X_train, y_train)))
|
||||||
|
print('Test set score: {:.4f}'.format(clf_en.score(X_test, y_test)))
|
||||||
|
|
||||||
|
dot_data = tree.export_graphviz(clf_en, out_file=None,
|
||||||
|
feature_names=X_train.columns,
|
||||||
|
class_names=y_train.unique(),
|
||||||
|
filled=True, rounded=True,
|
||||||
|
special_characters=True)
|
||||||
|
|
||||||
|
#nowy klient testowo
|
||||||
|
new_client = {
|
||||||
|
"Wrinkles": random.choice(['Yes', 'No']),
|
||||||
|
"Balding": random.choice(['Yes', 'No']),
|
||||||
|
"Beard": random.choice(['Yes', 'No']),
|
||||||
|
"Outfit": random.choice(['Messy', 'Casual','Formal']),
|
||||||
|
"Glasses": random.choice(['Yes', 'No']),
|
||||||
|
"Tattoo": random.choice(['Yes', 'No']),
|
||||||
|
"Hair": random.choice(['Color', 'Grey', 'Natural']),
|
||||||
|
"Behaviour": random.choice(['Energetic', 'Stressed', 'Calm'])
|
||||||
|
}
|
||||||
|
|
||||||
|
new_client_df = pd.DataFrame(new_client, index=[0])
|
||||||
|
new_client_df_encoded = encoder.transform(new_client_df)
|
||||||
|
prediction = clf_en.predict(new_client_df_encoded)
|
||||||
|
|
||||||
|
print("\nNew client:")
|
||||||
|
print(new_client_df)
|
||||||
|
print("Prediction:", prediction[0])
|
||||||
|
|
||||||
|
graph = graphviz.Source(dot_data)
|
||||||
|
graph.render("decision_tree", format='png')
|
Loading…
Reference in New Issue
Block a user