From 5f0e9bc9a9cfefa164a39b5a536c4049c2ab0321 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Sk=C3=B3rzewski?= <pawel.skorzewski@amu.edu.pl> Date: Thu, 24 Mar 2022 10:35:07 +0100 Subject: [PATCH] Laboratoria 4 - ewaluacja --- lab/04_scikit-learn.ipynb | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/lab/04_scikit-learn.ipynb b/lab/04_scikit-learn.ipynb index 8284a61..a8bd380 100644 --- a/lab/04_scikit-learn.ipynb +++ b/lab/04_scikit-learn.ipynb @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -47,7 +47,8 @@ " [409340.86981766]\n", " [278401.700237 ]\n", " [301680.27997255]\n", - " [281051.71865054]]\n" + " [281051.71865054]]\n", + "Błąd średniokwadratowy wynosi 39595039990.2324\n" ] } ], @@ -57,6 +58,8 @@ "\n", "from sklearn.linear_model import LinearRegression # Model regresji liniowej z biblioteki scikit-learn\n", "\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", "\n", "FEATURES = [\n", " 'Powierzchnia w m2',\n", @@ -78,7 +81,6 @@ "\n", "# Wczytanie danych\n", "data = pd.read_csv(dataset_filename, header=0, sep='\\t')\n", - "columns = data.columns[1:] # wszystkie kolumny oprócz pierwszej (\"cena\")\n", "data = data[FEATURES + ['cena']] # wybór cech\n", "data = preprocess(data) # wstępne przetworzenie danych\n", "\n", @@ -98,7 +100,12 @@ "x_test = pd.DataFrame(data_test[FEATURES])\n", "y_predicted = model.predict(x_test) # predykcja wyników na podstawie modelu\n", "\n", - "print(y_predicted[:10]) # Pierwsze 10 wyników" + "print(y_predicted[:10]) # Pierwsze 10 wyników\n", + "\n", + "# Ewaluacja\n", + "mse = mean_squared_error(y_predicted, y_expected) # Błąd średniokwadratowy na zbiorze testowym\n", + "\n", + "print(\"Błąd średniokwadratowy wynosi \", mse)" ] }, { @@ -126,7 +133,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.7.6" }, "livereveal": { "start_slideshow_at": "selected",