Usuwanie obserwacji odstających

This commit is contained in:
Paweł Skórzewski 2022-03-25 10:08:25 +01:00
parent 1f98b6813c
commit 44ae9e8b11

View File

@ -261,7 +261,9 @@
"### Metryki dla zadań regresji\n", "### Metryki dla zadań regresji\n",
"\n", "\n",
"Dla zadań regresji możemy zastosować np.:\n", "Dla zadań regresji możemy zastosować np.:\n",
" * błąd średniokwadratowy (*root-mean-square error*, RMSE):\n", " * błąd średniokwadratowy (*mean-square error*, MSE):\n",
" $$ \\mathrm{MSE} \\, = \\, \\frac{1}{m} \\sum_{i=1}^{m} \\left( \\hat{y}^{(i)} - y^{(i)} \\right)^2 $$\n",
" * pierwiastek z błędu średniokwadratowego (*root-mean-square error*, RMSE):\n",
" $$ \\mathrm{RMSE} \\, = \\, \\sqrt{ \\frac{1}{m} \\sum_{i=1}^{m} \\left( \\hat{y}^{(i)} - y^{(i)} \\right)^2 } $$\n", " $$ \\mathrm{RMSE} \\, = \\, \\sqrt{ \\frac{1}{m} \\sum_{i=1}^{m} \\left( \\hat{y}^{(i)} - y^{(i)} \\right)^2 } $$\n",
" * średni błąd bezwzględny (*mean absolute error*, MAE):\n", " * średni błąd bezwzględny (*mean absolute error*, MAE):\n",
" $$ \\mathrm{MAE} \\, = \\, \\frac{1}{m} \\sum_{i=1}^{m} \\left| \\hat{y}^{(i)} - y^{(i)} \\right| $$" " $$ \\mathrm{MAE} \\, = \\, \\frac{1}{m} \\sum_{i=1}^{m} \\left| \\hat{y}^{(i)} - y^{(i)} \\right| $$"
@ -293,7 +295,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 24, "execution_count": 1,
"metadata": { "metadata": {
"slideshow": { "slideshow": {
"slide_type": "notes" "slide_type": "notes"
@ -315,7 +317,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 25, "execution_count": 2,
"metadata": { "metadata": {
"slideshow": { "slideshow": {
"slide_type": "notes" "slide_type": "notes"
@ -334,7 +336,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": 3,
"metadata": { "metadata": {
"slideshow": { "slideshow": {
"slide_type": "notes" "slide_type": "notes"
@ -406,7 +408,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 27, "execution_count": 4,
"metadata": { "metadata": {
"slideshow": { "slideshow": {
"slide_type": "notes" "slide_type": "notes"
@ -430,7 +432,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 28, "execution_count": 5,
"metadata": { "metadata": {
"slideshow": { "slideshow": {
"slide_type": "subslide" "slide_type": "subslide"
@ -456,7 +458,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 29, "execution_count": 6,
"metadata": { "metadata": {
"slideshow": { "slideshow": {
"slide_type": "notes" "slide_type": "notes"
@ -503,7 +505,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 30, "execution_count": 7,
"metadata": { "metadata": {
"slideshow": { "slideshow": {
"slide_type": "notes" "slide_type": "notes"
@ -531,7 +533,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 31, "execution_count": 8,
"metadata": { "metadata": {
"slideshow": { "slideshow": {
"slide_type": "subslide" "slide_type": "subslide"
@ -561,7 +563,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 32, "execution_count": 9,
"metadata": { "metadata": {
"slideshow": { "slideshow": {
"slide_type": "notes" "slide_type": "notes"
@ -583,7 +585,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "execution_count": 10,
"metadata": { "metadata": {
"slideshow": { "slideshow": {
"slide_type": "subslide" "slide_type": "subslide"
@ -597,7 +599,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 34, "execution_count": 11,
"metadata": { "metadata": {
"slideshow": { "slideshow": {
"slide_type": "notes" "slide_type": "notes"
@ -617,7 +619,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 35, "execution_count": 12,
"metadata": { "metadata": {
"slideshow": { "slideshow": {
"slide_type": "subslide" "slide_type": "subslide"
@ -627,7 +629,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "32929ab5e3024128bd39a6c165e50196", "model_id": "6325cec10a034a9d96d862dee900013d",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -644,7 +646,7 @@
"<function __main__.interactive_classification(highlight)>" "<function __main__.interactive_classification(highlight)>"
] ]
}, },
"execution_count": 35, "execution_count": 12,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1185,9 +1187,10 @@
"# Odrzućmy obserwacje odstające\n", "# Odrzućmy obserwacje odstające\n",
"alldata_no_outliers = [\n", "alldata_no_outliers = [\n",
" (index, item) for index, item in alldata.iterrows() \n", " (index, item) for index, item in alldata.iterrows() \n",
" if item.price > 100 and item.sqrMetres > 10]\n", " if item.price > 10000 and item.sqrMetres < 1000]\n",
"\n", "\n",
"alldata_no_outliers = alldata.loc[(alldata['price'] > 100) & (alldata['sqrMetres'] > 100)]" "# Alternatywnie można to zrobić w następujący sposób\n",
"alldata_no_outliers = alldata.loc[(alldata['price'] > 10000) & (alldata['sqrMetres'] < 1000)]"
] ]
}, },
{ {