Traktor/source/decision_tree.py

46 lines
1.1 KiB
Python

import pandas as pd
from sklearn.model_selection import train_test_split
import joblib
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt
dtype_dict = {
'season': str,
'anomalies': bool,
'weather': str,
'temp': int,
'water': int,
'nutri': int,
'pests': int,
'weeds': int,
'type': str,
'ripeness': int,
'target_column': str
}
# Wczytaj dane z pliku CSV
data = pd.read_csv('resources/dataset.csv', header=0, dtype=dtype_dict)
#print(data)
X = data.drop('target_column', axis=1)
y = data['target_column']
X = pd.get_dummies(X)
# X.to_csv('model_data1.csv', index=False)
# Podział danych na zbiór treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Dopasowanie modelu drzewa decyzyjnego
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
fig = plt.figure(figsize=(25, 20))
_ = plot_tree(model, feature_names=X.columns, class_names=model.classes_, filled=True)
plt.savefig('drzewo_decyzyjne.png')
plt.show()
# 'model' to wcześniej wytrenowany model drzewa decyzyjnego
joblib.dump(model, 'model.pkl')