import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression, Ridge from sklearn.preprocessing import StandardScaler from sklearn.metrics import mean_squared_error data = pd.read_csv('communities.data', header=None, delimiter=",", na_values=["?"] ) data = data.dropna() data = data.drop(columns=[3]) x = data.iloc[:, :-1] y = data.iloc[:, -1] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42) model = LinearRegression() model_ridge = Ridge(alpha=1.0) scaler = StandardScaler() x_train_scaled = scaler.fit_transform(x_train) x_test_scaled = scaler.transform(x_test) model.fit(x_train_scaled, y_train) model_ridge.fit(x_train_scaled, y_train) y_pred = model.predict(x_test_scaled) y_pred_ridge = model_ridge.predict(x_test_scaled) rmse = mean_squared_error(y_test, y_pred, squared=False) rmse_ridge = mean_squared_error(y_test, y_pred_ridge, squared=False) print("Błąd średniokwadratowy dla regresji liniowej:", rmse) print("Błąd średniokwadratowy dla regresji z regularyzacją:", rmse_ridge)