Projekt_ML/regression (linear and Random Forest Tree).py

39 lines
1.4 KiB
Python
Raw Normal View History

2024-02-25 17:26:20 +01:00
import pandas as pd
import math
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
df = pd.read_csv('data.csv')
scaler = StandardScaler()
X = scaler.fit_transform(df.iloc[:, :-1])
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Podzielono zbiór na {} rekordów uczących i {} rekordów testowych".format(len(y_train), len(y_test)))
regressor_lin = LinearRegression()
regressor_lin.fit(X_train, y_train)
print("\nWyuczono model regresjii liniowej:")
predicted_prices_lin = regressor_lin.predict(X_test)
rmse_lin = math.sqrt(metrics.mean_squared_error(y_test, predicted_prices_lin))
mae_lin = metrics.mean_absolute_error(y_test, predicted_prices_lin)
print('RMSE: {:.2f}'.format(rmse_lin))
print('MAE: {:.2f}'.format(mae_lin))
regressor_RF = RandomForestRegressor(random_state=8)
regressor_RF.fit(X_train, y_train)
print("\nWyuczono model regresji drzew Random Forest:")
predicted_prices_RF = regressor_RF.predict(X_test)
rmse_RF = math.sqrt(metrics.mean_squared_error(y_test, predicted_prices_RF))
mae_RF = metrics.mean_absolute_error(y_test, predicted_prices_RF)
print('RMSE: {:.2f}'.format(rmse_RF))
print('MAE: {:.2f}'.format(mae_RF))