import pandas as pd import math from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor from sklearn import metrics df = pd.read_csv('data.csv') scaler = StandardScaler() X = scaler.fit_transform(df.iloc[:, :-1]) y = df.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) print("Podzielono zbiór na {} rekordów uczących i {} rekordów testowych".format(len(y_train), len(y_test))) regressor_lin = LinearRegression() regressor_lin.fit(X_train, y_train) print("\nWyuczono model regresjii liniowej:") predicted_prices_lin = regressor_lin.predict(X_test) rmse_lin = math.sqrt(metrics.mean_squared_error(y_test, predicted_prices_lin)) mae_lin = metrics.mean_absolute_error(y_test, predicted_prices_lin) print('RMSE: {:.2f} zł'.format(rmse_lin)) print('MAE: {:.2f} zł'.format(mae_lin)) regressor_RF = RandomForestRegressor(random_state=8) regressor_RF.fit(X_train, y_train) print("\nWyuczono model regresji drzew Random Forest:") predicted_prices_RF = regressor_RF.predict(X_test) rmse_RF = math.sqrt(metrics.mean_squared_error(y_test, predicted_prices_RF)) mae_RF = metrics.mean_absolute_error(y_test, predicted_prices_RF) print('RMSE: {:.2f} zł'.format(rmse_RF)) print('MAE: {:.2f} zł'.format(mae_RF))