2024-05-12 16:16:25 +02:00
|
|
|
import pandas as pd
|
|
|
|
from sklearn.model_selection import train_test_split
|
2024-05-12 19:20:33 +02:00
|
|
|
import joblib
|
2024-05-12 16:16:25 +02:00
|
|
|
from sklearn.tree import DecisionTreeClassifier, plot_tree
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
dtype_dict = {
|
|
|
|
'season': str,
|
2024-05-12 19:20:33 +02:00
|
|
|
'anomalies': bool,
|
2024-05-12 16:16:25 +02:00
|
|
|
'weather': str,
|
|
|
|
'temp': int,
|
|
|
|
'water': int,
|
|
|
|
'nutri': int,
|
2024-05-12 19:20:33 +02:00
|
|
|
'pests': int,
|
|
|
|
'weeds': int,
|
2024-05-12 16:16:25 +02:00
|
|
|
'type': str,
|
|
|
|
'ripeness': int,
|
|
|
|
'target_column': str
|
|
|
|
}
|
|
|
|
# Wczytaj dane z pliku CSV
|
|
|
|
data = pd.read_csv('resources/dataset.csv', header=0, dtype=dtype_dict)
|
|
|
|
#print(data)
|
|
|
|
X = data.drop('target_column', axis=1)
|
|
|
|
y = data['target_column']
|
|
|
|
X = pd.get_dummies(X)
|
2024-05-12 19:20:33 +02:00
|
|
|
# X.to_csv('model_data1.csv', index=False)
|
2024-05-12 16:16:25 +02:00
|
|
|
|
|
|
|
|
|
|
|
# Podział danych na zbiór treningowy i testowy
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
|
|
|
|
|
# Dopasowanie modelu drzewa decyzyjnego
|
|
|
|
model = DecisionTreeClassifier()
|
|
|
|
model.fit(X_train, y_train)
|
|
|
|
|
2024-05-12 19:20:33 +02:00
|
|
|
|
2024-05-12 16:37:09 +02:00
|
|
|
fig = plt.figure(figsize=(25, 20))
|
2024-05-12 16:16:25 +02:00
|
|
|
_ = plot_tree(model, feature_names=X.columns, class_names=model.classes_, filled=True)
|
|
|
|
plt.savefig('drzewo_decyzyjne.png')
|
|
|
|
plt.show()
|
|
|
|
|
2024-05-12 19:20:33 +02:00
|
|
|
# 'model' to wcześniej wytrenowany model drzewa decyzyjnego
|
|
|
|
joblib.dump(model, 'model.pkl')
|
|
|
|
|
2024-05-12 16:16:25 +02:00
|
|
|
|
|
|
|
|