Machine_learning_2023/decisionTree/prepare.py

21 lines
668 B
Python
Raw Normal View History

2023-05-18 23:18:07 +02:00
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
import joblib
pima = pd.read_csv("data.csv", header=1, delimiter=';')
feature_cols = ['Size', 'Color', 'Sound', 'Sharp','Smell', 'Length','Temperature', 'Weight']
X = pima[feature_cols]
y = pima.ToRemove
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
clf = DecisionTreeClassifier()
2023-05-19 15:49:17 +02:00
clf = clf.fit(X_train.values, y_train)
2023-05-18 23:18:07 +02:00
joblib.dump(clf, 'decision_tree_model.pkl')
y_pred = clf.predict(X_test)
2023-05-11 19:34:08 +02:00
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))