import pandas as pd from sklearn.model_selection import train_test_split import joblib from sklearn.tree import DecisionTreeClassifier, plot_tree import matplotlib.pyplot as plt dtype_dict = { 'season': str, 'anomalies': bool, 'weather': str, 'temp': int, 'water': int, 'nutri': int, 'pests': int, 'weeds': int, 'type': str, 'ripeness': int, 'target_column': str } # Wczytaj dane z pliku CSV data = pd.read_csv('resources/dataset.csv', header=0, dtype=dtype_dict) #print(data) X = data.drop('target_column', axis=1) y = data['target_column'] X = pd.get_dummies(X) # X.to_csv('model_data1.csv', index=False) # Podział danych na zbiór treningowy i testowy X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Dopasowanie modelu drzewa decyzyjnego model = DecisionTreeClassifier() model.fit(X_train, y_train) fig = plt.figure(figsize=(25, 20)) _ = plot_tree(model, feature_names=X.columns, class_names=model.classes_, filled=True) plt.savefig('drzewo_decyzyjne.png') plt.show() # 'model' to wcześniej wytrenowany model drzewa decyzyjnego joblib.dump(model, 'model.pkl')