From 44ae9e8b1158431f02888a2a3aa3b11b2b279f73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Sk=C3=B3rzewski?= Date: Fri, 25 Mar 2022 10:08:25 +0100 Subject: [PATCH] =?UTF-8?q?Usuwanie=20obserwacji=20odstaj=C4=85cych?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wyk/04_Metody_ewaluacji.ipynb | 37 +++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/wyk/04_Metody_ewaluacji.ipynb b/wyk/04_Metody_ewaluacji.ipynb index b3160a7..117f543 100644 --- a/wyk/04_Metody_ewaluacji.ipynb +++ b/wyk/04_Metody_ewaluacji.ipynb @@ -261,7 +261,9 @@ "### Metryki dla zadań regresji\n", "\n", "Dla zadań regresji możemy zastosować np.:\n", - " * błąd średniokwadratowy (*root-mean-square error*, RMSE):\n", + " * błąd średniokwadratowy (*mean-square error*, MSE):\n", + " $$ \\mathrm{MSE} \\, = \\, \\frac{1}{m} \\sum_{i=1}^{m} \\left( \\hat{y}^{(i)} - y^{(i)} \\right)^2 $$\n", + " * pierwiastek z błędu średniokwadratowego (*root-mean-square error*, RMSE):\n", " $$ \\mathrm{RMSE} \\, = \\, \\sqrt{ \\frac{1}{m} \\sum_{i=1}^{m} \\left( \\hat{y}^{(i)} - y^{(i)} \\right)^2 } $$\n", " * średni błąd bezwzględny (*mean absolute error*, MAE):\n", " $$ \\mathrm{MAE} \\, = \\, \\frac{1}{m} \\sum_{i=1}^{m} \\left| \\hat{y}^{(i)} - y^{(i)} \\right| $$" @@ -293,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 1, "metadata": { "slideshow": { "slide_type": "notes" @@ -315,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 2, "metadata": { "slideshow": { "slide_type": "notes" @@ -334,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 3, "metadata": { "slideshow": { "slide_type": "notes" @@ -406,7 +408,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 4, "metadata": { "slideshow": { "slide_type": "notes" @@ -430,7 +432,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 5, "metadata": { "slideshow": { "slide_type": "subslide" @@ -456,7 +458,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 6, "metadata": { "slideshow": { "slide_type": "notes" @@ -503,7 +505,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 7, "metadata": { "slideshow": { "slide_type": "notes" @@ -531,7 +533,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 8, "metadata": { "slideshow": { "slide_type": "subslide" @@ -561,7 +563,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 9, "metadata": { "slideshow": { "slide_type": "notes" @@ -583,7 +585,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 10, "metadata": { "slideshow": { "slide_type": "subslide" @@ -597,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 11, "metadata": { "slideshow": { "slide_type": "notes" @@ -617,7 +619,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 12, "metadata": { "slideshow": { "slide_type": "subslide" @@ -627,7 +629,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "32929ab5e3024128bd39a6c165e50196", + "model_id": "6325cec10a034a9d96d862dee900013d", "version_major": 2, "version_minor": 0 }, @@ -644,7 +646,7 @@ "" ] }, - "execution_count": 35, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1185,9 +1187,10 @@ "# Odrzućmy obserwacje odstające\n", "alldata_no_outliers = [\n", " (index, item) for index, item in alldata.iterrows() \n", - " if item.price > 100 and item.sqrMetres > 10]\n", + " if item.price > 10000 and item.sqrMetres < 1000]\n", "\n", - "alldata_no_outliers = alldata.loc[(alldata['price'] > 100) & (alldata['sqrMetres'] > 100)]" + "# Alternatywnie można to zrobić w następujący sposób\n", + "alldata_no_outliers = alldata.loc[(alldata['price'] > 10000) & (alldata['sqrMetres'] < 1000)]" ] }, {