38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
import pandas as pd
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.linear_model import LinearRegression, Ridge
|
|
from sklearn.preprocessing import StandardScaler
|
|
from sklearn.metrics import mean_squared_error
|
|
|
|
data = pd.read_csv('communities.data',
|
|
header=None,
|
|
delimiter=",",
|
|
na_values=["?"]
|
|
)
|
|
|
|
data = data.dropna()
|
|
data = data.drop(columns=[3])
|
|
|
|
x = data.iloc[:, :-1]
|
|
y = data.iloc[:, -1]
|
|
|
|
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
|
|
|
|
model = LinearRegression()
|
|
model_ridge = Ridge(alpha=1.0)
|
|
|
|
scaler = StandardScaler()
|
|
x_train_scaled = scaler.fit_transform(x_train)
|
|
x_test_scaled = scaler.transform(x_test)
|
|
|
|
model.fit(x_train_scaled, y_train)
|
|
model_ridge.fit(x_train_scaled, y_train)
|
|
|
|
y_pred = model.predict(x_test_scaled)
|
|
y_pred_ridge = model_ridge.predict(x_test_scaled)
|
|
|
|
rmse = mean_squared_error(y_test, y_pred, squared=False)
|
|
rmse_ridge = mean_squared_error(y_test, y_pred_ridge, squared=False)
|
|
|
|
print("Błąd średniokwadratowy dla regresji liniowej:", rmse)
|
|
print("Błąd średniokwadratowy dla regresji z regularyzacją:", rmse_ridge) |