Usuwanie obserwacji odstających

This commit is contained in:
Paweł Skórzewski 2022-03-25 10:08:25 +01:00
parent 1f98b6813c
commit 44ae9e8b11

View File

@ -261,7 +261,9 @@
"### Metryki dla zadań regresji\n",
"\n",
"Dla zadań regresji możemy zastosować np.:\n",
" * błąd średniokwadratowy (*root-mean-square error*, RMSE):\n",
" * błąd średniokwadratowy (*mean-square error*, MSE):\n",
" $$ \\mathrm{MSE} \\, = \\, \\frac{1}{m} \\sum_{i=1}^{m} \\left( \\hat{y}^{(i)} - y^{(i)} \\right)^2 $$\n",
" * pierwiastek z błędu średniokwadratowego (*root-mean-square error*, RMSE):\n",
" $$ \\mathrm{RMSE} \\, = \\, \\sqrt{ \\frac{1}{m} \\sum_{i=1}^{m} \\left( \\hat{y}^{(i)} - y^{(i)} \\right)^2 } $$\n",
" * średni błąd bezwzględny (*mean absolute error*, MAE):\n",
" $$ \\mathrm{MAE} \\, = \\, \\frac{1}{m} \\sum_{i=1}^{m} \\left| \\hat{y}^{(i)} - y^{(i)} \\right| $$"
@ -293,7 +295,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 1,
"metadata": {
"slideshow": {
"slide_type": "notes"
@ -315,7 +317,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 2,
"metadata": {
"slideshow": {
"slide_type": "notes"
@ -334,7 +336,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 3,
"metadata": {
"slideshow": {
"slide_type": "notes"
@ -406,7 +408,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 4,
"metadata": {
"slideshow": {
"slide_type": "notes"
@ -430,7 +432,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 5,
"metadata": {
"slideshow": {
"slide_type": "subslide"
@ -456,7 +458,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 6,
"metadata": {
"slideshow": {
"slide_type": "notes"
@ -503,7 +505,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 7,
"metadata": {
"slideshow": {
"slide_type": "notes"
@ -531,7 +533,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 8,
"metadata": {
"slideshow": {
"slide_type": "subslide"
@ -561,7 +563,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 9,
"metadata": {
"slideshow": {
"slide_type": "notes"
@ -583,7 +585,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 10,
"metadata": {
"slideshow": {
"slide_type": "subslide"
@ -597,7 +599,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 11,
"metadata": {
"slideshow": {
"slide_type": "notes"
@ -617,7 +619,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 12,
"metadata": {
"slideshow": {
"slide_type": "subslide"
@ -627,7 +629,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "32929ab5e3024128bd39a6c165e50196",
"model_id": "6325cec10a034a9d96d862dee900013d",
"version_major": 2,
"version_minor": 0
},
@ -644,7 +646,7 @@
"<function __main__.interactive_classification(highlight)>"
]
},
"execution_count": 35,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@ -1185,9 +1187,10 @@
"# Odrzućmy obserwacje odstające\n",
"alldata_no_outliers = [\n",
" (index, item) for index, item in alldata.iterrows() \n",
" if item.price > 100 and item.sqrMetres > 10]\n",
" if item.price > 10000 and item.sqrMetres < 1000]\n",
"\n",
"alldata_no_outliers = alldata.loc[(alldata['price'] > 100) & (alldata['sqrMetres'] > 100)]"
"# Alternatywnie można to zrobić w następujący sposób\n",
"alldata_no_outliers = alldata.loc[(alldata['price'] > 10000) & (alldata['sqrMetres'] < 1000)]"
]
},
{