diff --git a/zajecia8/.RData b/zajecia8/.RData new file mode 100644 index 0000000..b41938b Binary files /dev/null and b/zajecia8/.RData differ diff --git a/zajecia8/.Rhistory b/zajecia8/.Rhistory new file mode 100644 index 0000000..f4af804 --- /dev/null +++ b/zajecia8/.Rhistory @@ -0,0 +1,176 @@ +rok <- 1995:2002 +liczba_przypadkow <- c(39.7, 38.2, 34.7, 33.1, 30.1, 28.4, 26.3, 24.7) +data_set <- data.frame(rok = rok, liczba_przypadkow = liczba_przypadkow) +View(data_set) +plot(data_set, main = "Wykres rozrzutu", pch = 16) +model <- lm(liczba_przypadkow ~ rok, data = data_set) +model$coefficients +plot(data_set, main = "Wykres rozrzutu", pch = 16) +abline(model, col = "red", lwd = 2) +coef(model) +confint(model) +summary(model) +fitted(model) +residuals(model) +View(data_set) +temp_rok <- data.frame(rok = seq(min(data_set$rok) - 10, +max(data_set$rok) + 10, +length = 100)) +pred <- stats::predict(model, temp_rok, interval = "prediction") +plot(data_set, main = "Wykres rozrzutu", pch = 16) +abline(model, col = "red", lwd = 2) +lines(temp_rok$rok, pred[, 2], lty = 2, col = "red") +lines(temp_rok$rok, pred[, 3], lty = 2, col = "red") +new_rok <- data.frame(rok = 2003:2007) +(pred_2003_2007 <- stats::predict(model, new_rok, interval = 'prediction')) +plot(data_set, main = "Wykres rozrzutu z predykcj� na lata 2003-2007", pch = 16, +xlim = c(1995, 2007), ylim = c(10, 40)) +plot(data_set, main = "Wykres rozrzutu z predykcją na lata 2003-2007", pch = 16, +xlim = c(1995, 2007), ylim = c(10, 40)) +abline(model, col = "red", lwd = 2) +points(2003:2007, pred_2003_2007[, 1], col = "blue", pch = 16) +temp_rok <- data.frame(rok = seq(1994, 2008, length = 100)) +pred <- stats::predict(model, temp_rok, interval = "prediction") +lines(temp_rok$rok, pred[, 2], lty = 2, col = "red") +lines(temp_rok$rok, pred[, 3], lty = 2, col = "red") +View(temp_rok) +View(pred) +load(url("http://ls.home.amu.edu.pl/data_sets/braking.RData")) +load(url("http://ls.home.amu.edu.pl/data_sets/braking.RData")) +head(braking) +plot(braking, main = "Wykres rozrzutu", pch = 16) +View(braking) +which(braking$distance > 150) +model <- lm(distance ~ speed, data = braking) +plot(braking, main = "Wykres rozrzutu", pch = 16) +abline(model, col = "red", lwd = 2) +coef(model) +confint(model) +summary(model) +fitted(model) +residuals(model) +temp_speed <- data.frame(speed = seq(min(braking$speed) - 10, +max(braking$speed) + 10, +length = 100)) +pred <- stats::predict(model, temp_speed, interval = "prediction") +plot(braking, main = "Wykres rozrzutu", pch = 16, ylim = c(-50, 200)) +abline(model, col = "red", lwd = 2) +lines(temp_speed$speed, pred[, 2], lty = 2, col = "red") +lines(temp_speed$speed, pred[, 3], lty = 2, col = "red") +new_speed <- data.frame(speed = 30:50) +(pred_30_50 <- stats::predict(model, new_speed, interval = 'prediction')) +plot(braking, main = "Wykres rozrzutu z predykcj� dla pr�dko�ci 30, 31, ..., 50", pch = 16, +xlim = c(0, 50), ylim = c(-50, 200)) +abline(model, col = "red", lwd = 2) +points(30:50, pred_30_50[, 1], col = "blue", pch = 16) +temp_speed <- data.frame(speed = seq(-5, 55, length = 100)) +pred <- stats::predict(model, temp_speed, interval = "prediction") +lines(temp_speed$speed, pred[, 2], lty = 2, col = "red") +lines(temp_speed$speed, pred[, 3], lty = 2, col = "red") +model <- lm(distance ~ speed - 1, data = braking) +plot(braking, main = "Wykres rozrzutu", pch = 16) +abline(model, col = "red", lwd = 2) +coef(model) +confint(model) +summary(model) +temp_speed <- data.frame(speed = seq(min(braking$speed) - 10, +max(braking$speed) + 10, +length = 100)) +pred <- stats::predict(model, temp_speed, interval = "prediction") +plot(braking, main = "Wykres rozrzutu", pch = 16, ylim = c(-50, 200)) +abline(model, col = "red", lwd = 2) +lines(temp_speed$speed, pred[, 2], lty = 2, col = "red") +lines(temp_speed$speed, pred[, 3], lty = 2, col = "red") +new_speed <- data.frame(speed = 30:50) +(pred_30_50 <- stats::predict(model, new_speed, interval = 'prediction')) +plot(braking, main = "Wykres rozrzutu z predykcj� dla pr�dko�ci 30, 31, ..., 50", pch = 16, +xlim = c(0, 50), ylim = c(-50, 200)) +abline(model, col = "red", lwd = 2) +points(30:50, pred_30_50[, 1], col = "blue", pch = 16) +temp_speed <- data.frame(speed = seq(-5, 55, length = 100)) +pred <- stats::predict(model, temp_speed, interval = "prediction") +lines(temp_speed$speed, pred[, 2], lty = 2, col = "red") +lines(temp_speed$speed, pred[, 3], lty = 2, col = "red") +braking_1 <- braking[-27, ] +model_1 <- lm(distance ~ speed, data = braking_1) +plot(braking_1, main = "Wykres rozrzutu", pch = 16) +abline(model_1, col = "green", lwd = 2) +coef(model_1) +confint(model_1) +summary(model_1) +fitted(model_1) +residuals(model_1) +temp_speed_1 <- data.frame(speed = seq(min(braking_1$speed) - 10, +max(braking_1$speed) + 10, +length = 100)) +pred_1 <- stats::predict(model_1, temp_speed_1, interval = "prediction") +plot(braking_1, main = "Wykres rozrzutu", pch = 16, ylim = c(-50, 120)) +abline(model_1, col = "green", lwd = 2) +lines(temp_speed_1$speed, pred_1[, 2], lty = 2, col = "green") +lines(temp_speed_1$speed, pred_1[, 3], lty = 2, col = "green") +new_speed <- data.frame(speed = 30:50) +(pred_30_50_1 <- stats::predict(model_1, new_speed, interval = 'prediction')) +plot(braking_1, main = "Wykres rozrzutu z predykcj� dla pr�dko�ci 30, 31, ..., 50", pch = 16, +xlim = c(0, 50), ylim = c(-50, 200)) +abline(model_1, col = "green", lwd = 2) +points(30:50, pred_30_50_1[, 1], col = "blue", pch = 16) +temp_speed <- data.frame(speed = seq(-5, 55, length = 100)) +pred_1 <- stats::predict(model_1, temp_speed, interval = "prediction") +lines(temp_speed$speed, pred_1[, 2], lty = 2, col = "green") +lines(temp_speed$speed, pred_1[, 3], lty = 2, col = "green") +braking_1 <- braking[-27, ] +model_1 <- lm(distance ~ speed - 1, data = braking_1) +plot(braking_1, main = "Wykres rozrzutu", pch = 16) +abline(model_1, col = "green", lwd = 2) +coef(model_1) +confint(model_1) +summary(model_1) +fitted(model_1) +residuals(model_1) +braking_1 <- braking[-27, ] +model_1 <- lm(distance ~ speed - 1, data = braking_1) +plot(braking_1, main = "Wykres rozrzutu", pch = 16) +abline(model_1, col = "green", lwd = 2) +coef(model_1) +confint(model_1) +summary(model_1) +fitted(model_1) +residuals(model_1) +braking_1 <- braking[-27, ] +model_1 <- lm(distance ~ speed, data = braking_1) +plot(braking_1, main = "Wykres rozrzutu", pch = 16) +abline(model_1, col = "green", lwd = 2) +coef(model_1) +confint(model_1) +braking_1 <- braking[-27, ] +model_1 <- lm(distance ~ speed - 1, data = braking_1) +plot(braking_1, main = "Wykres rozrzutu", pch = 16) +abline(model_1, col = "green", lwd = 2) +coef(model_1) +confint(model_1) +summary(model_1) +fitted(model_1) +residuals(model_1) +temp_speed_1 <- data.frame(speed = seq(min(braking_1$speed) - 10, +max(braking_1$speed) + 10, +length = 100)) +pred_1 <- stats::predict(model_1, temp_speed_1, interval = "prediction") +plot(braking_1, main = "Wykres rozrzutu", pch = 16, ylim = c(-50, 120)) +abline(model_1, col = "green", lwd = 2) +lines(temp_speed_1$speed, pred_1[, 2], lty = 2, col = "green") +lines(temp_speed_1$speed, pred_1[, 3], lty = 2, col = "green") +new_speed <- data.frame(speed = 30:50) +(pred_30_50_1 <- stats::predict(model_1, new_speed, interval = 'prediction')) +plot(braking_1, main = "Wykres rozrzutu z predykcj� dla pr�dko�ci 30, 31, ..., 50", pch = 16, +xlim = c(0, 50), ylim = c(-50, 200)) +abline(model_1, col = "green", lwd = 2) +points(30:50, pred_30_50_1[, 1], col = "blue", pch = 16) +temp_speed <- data.frame(speed = seq(-5, 55, length = 100)) +pred_1 <- stats::predict(model_1, temp_speed, interval = "prediction") +lines(temp_speed$speed, pred_1[, 2], lty = 2, col = "green") +lines(temp_speed$speed, pred_1[, 3], lty = 2, col = "green") +summary(model_1) +braking_1 <- braking[-27, ] +model_1 <- lm(distance ~ speed, data = braking_1) +summary(model_1) +median(residuals(model_1)) diff --git a/zajecia8/README.md b/zajecia8/README.md new file mode 100644 index 0000000..fb144d4 --- /dev/null +++ b/zajecia8/README.md @@ -0,0 +1,18 @@ +# Zajęcia 8 + + +## Regresja +Główną ideą regresji jest przewidywanie, prognozowanie danych dla pewnej zmiennej na podstawie innych zmiennych. Innymi słowy, jaką wartość przyjmie dana zmienna gdy będziemy znali wartość innej zmiennej. Oczywiście, aby móc "poszukiwać" wartości jednej zmiennej na podstawie innej zmiennej musimy za pomocą analizy regresji skonstruować model regresyjny, model, który będzie z założonym błędem statystycznym przewidywał wartość, poziom danej cechy. + + +## Regresja liniowa +Regresja liniowa jest najprostszym wariantem regresji w statystyce. Zakłada ona, że zależność pomiędzy zmienną objaśnianą a objaśniająca jest zależnością liniową. + + +## Poziom ufoności +Jak często mamy rację. Wyrażane w procentach. + + +## Reszty +To po prostu o ile różni się wynik zmierzony od przewidzianego. + diff --git a/zajecia8/Zajęcia8.pdf b/zajecia8/Zajęcia8.pdf new file mode 100644 index 0000000..8881ce6 Binary files /dev/null and b/zajecia8/Zajęcia8.pdf differ diff --git a/zajecia8/zadania.R b/zajecia8/zadania.R new file mode 100644 index 0000000..a2696c2 --- /dev/null +++ b/zajecia8/zadania.R @@ -0,0 +1,175 @@ + +# ZAD1 +rok <- 1995:2002 +liczba_przypadkow <- c(39.7, 38.2, 34.7, 33.1, 30.1, 28.4, 26.3, 24.7) +data_set <- data.frame(rok = rok, liczba_przypadkow = liczba_przypadkow) +plot(data_set, main = "Wykres rozrzutu", pch = 16) + +model <- lm(liczba_przypadkow ~ rok, data = data_set) +model$coefficients +plot(data_set, main = "Wykres rozrzutu", pch = 16) +abline(model, col = "red", lwd = 2) +coef(model) +confint(model) + +summary(model) + +fitted(model) +residuals(model) + +temp_rok <- data.frame(rok = seq(min(data_set$rok) - 10, + max(data_set$rok) + 10, + length = 100)) +pred <- stats::predict(model, temp_rok, interval = "prediction") +plot(data_set, main = "Wykres rozrzutu", pch = 16) +abline(model, col = "red", lwd = 2) +lines(temp_rok$rok, pred[, 2], lty = 2, col = "red") +lines(temp_rok$rok, pred[, 3], lty = 2, col = "red") + +new_rok <- data.frame(rok = 2003:2007) +(pred_2003_2007 <- stats::predict(model, new_rok, interval = 'prediction')) +plot(data_set, main = "Wykres rozrzutu z predykcją na lata 2003-2007", pch = 16, + xlim = c(1995, 2007), ylim = c(10, 40)) +abline(model, col = "red", lwd = 2) +points(2003:2007, pred_2003_2007[, 1], col = "blue", pch = 16) +temp_rok <- data.frame(rok = seq(1994, 2008, length = 100)) +pred <- stats::predict(model, temp_rok, interval = "prediction") +lines(temp_rok$rok, pred[, 2], lty = 2, col = "red") +lines(temp_rok$rok, pred[, 3], lty = 2, col = "red") + + +# ZAD2 + +load(url("http://ls.home.amu.edu.pl/data_sets/braking.RData")) +head(braking) +plot(braking, main = "Wykres rozrzutu", pch = 16) +# which(braking$distance > 150) + +model <- lm(distance ~ speed, data = braking) +plot(braking, main = "Wykres rozrzutu", pch = 16) +abline(model, col = "red", lwd = 2) +coef(model) +confint(model) + +summary(model) + +fitted(model) +residuals(model) + +temp_speed <- data.frame(speed = seq(min(braking$speed) - 10, + max(braking$speed) + 10, + length = 100)) +pred <- stats::predict(model, temp_speed, interval = "prediction") +plot(braking, main = "Wykres rozrzutu", pch = 16, ylim = c(-50, 200)) +abline(model, col = "red", lwd = 2) +lines(temp_speed$speed, pred[, 2], lty = 2, col = "red") +lines(temp_speed$speed, pred[, 3], lty = 2, col = "red") + +new_speed <- data.frame(speed = 30:50) +(pred_30_50 <- stats::predict(model, new_speed, interval = 'prediction')) +plot(braking, main = "Wykres rozrzutu z predykcją dla prędkości 30, 31, ..., 50", pch = 16, + xlim = c(0, 50), ylim = c(-50, 200)) +abline(model, col = "red", lwd = 2) +points(30:50, pred_30_50[, 1], col = "blue", pch = 16) +temp_speed <- data.frame(speed = seq(-5, 55, length = 100)) +pred <- stats::predict(model, temp_speed, interval = "prediction") +lines(temp_speed$speed, pred[, 2], lty = 2, col = "red") +lines(temp_speed$speed, pred[, 3], lty = 2, col = "red") + +# bez wyrazu wolnego +model <- lm(distance ~ speed - 1, data = braking) +plot(braking, main = "Wykres rozrzutu", pch = 16) +abline(model, col = "red", lwd = 2) +coef(model) +confint(model) + +summary(model) + +fitted(model) +residuals(model) + +temp_speed <- data.frame(speed = seq(min(braking$speed) - 10, + max(braking$speed) + 10, + length = 100)) +pred <- stats::predict(model, temp_speed, interval = "prediction") +plot(braking, main = "Wykres rozrzutu", pch = 16, ylim = c(-50, 200)) +abline(model, col = "red", lwd = 2) +lines(temp_speed$speed, pred[, 2], lty = 2, col = "red") +lines(temp_speed$speed, pred[, 3], lty = 2, col = "red") + +new_speed <- data.frame(speed = 30:50) +(pred_30_50 <- stats::predict(model, new_speed, interval = 'prediction')) +plot(braking, main = "Wykres rozrzutu z predykcją dla prędkości 30, 31, ..., 50", pch = 16, + xlim = c(0, 50), ylim = c(-50, 200)) +abline(model, col = "red", lwd = 2) +points(30:50, pred_30_50[, 1], col = "blue", pch = 16) +temp_speed <- data.frame(speed = seq(-5, 55, length = 100)) +pred <- stats::predict(model, temp_speed, interval = "prediction") +lines(temp_speed$speed, pred[, 2], lty = 2, col = "red") +lines(temp_speed$speed, pred[, 3], lty = 2, col = "red") + +braking_1 <- braking[-27, ] +model_1 <- lm(distance ~ speed, data = braking_1) +plot(braking_1, main = "Wykres rozrzutu", pch = 16) +abline(model_1, col = "green", lwd = 2) +coef(model_1) +confint(model_1) + +summary(model_1) + +fitted(model_1) +residuals(model_1) + +temp_speed_1 <- data.frame(speed = seq(min(braking_1$speed) - 10, + max(braking_1$speed) + 10, + length = 100)) +pred_1 <- stats::predict(model_1, temp_speed_1, interval = "prediction") +plot(braking_1, main = "Wykres rozrzutu", pch = 16, ylim = c(-50, 120)) +abline(model_1, col = "green", lwd = 2) +lines(temp_speed_1$speed, pred_1[, 2], lty = 2, col = "green") +lines(temp_speed_1$speed, pred_1[, 3], lty = 2, col = "green") + +new_speed <- data.frame(speed = 30:50) +(pred_30_50_1 <- stats::predict(model_1, new_speed, interval = 'prediction')) +plot(braking_1, main = "Wykres rozrzutu z predykcją dla prędkości 30, 31, ..., 50", pch = 16, + xlim = c(0, 50), ylim = c(-50, 200)) +abline(model_1, col = "green", lwd = 2) +points(30:50, pred_30_50_1[, 1], col = "blue", pch = 16) +temp_speed <- data.frame(speed = seq(-5, 55, length = 100)) +pred_1 <- stats::predict(model_1, temp_speed, interval = "prediction") +lines(temp_speed$speed, pred_1[, 2], lty = 2, col = "green") +lines(temp_speed$speed, pred_1[, 3], lty = 2, col = "green") + +# bez wyrazu wolnego +braking_1 <- braking[-27, ] +model_1 <- lm(distance ~ speed - 1, data = braking_1) +plot(braking_1, main = "Wykres rozrzutu", pch = 16) +abline(model_1, col = "green", lwd = 2) +coef(model_1) +confint(model_1) + +summary(model_1) + +fitted(model_1) +residuals(model_1) + +temp_speed_1 <- data.frame(speed = seq(min(braking_1$speed) - 10, + max(braking_1$speed) + 10, + length = 100)) +pred_1 <- stats::predict(model_1, temp_speed_1, interval = "prediction") +plot(braking_1, main = "Wykres rozrzutu", pch = 16, ylim = c(-50, 120)) +abline(model_1, col = "green", lwd = 2) +lines(temp_speed_1$speed, pred_1[, 2], lty = 2, col = "green") +lines(temp_speed_1$speed, pred_1[, 3], lty = 2, col = "green") + + +new_speed <- data.frame(speed = 30:50) +(pred_30_50_1 <- stats::predict(model_1, new_speed, interval = 'prediction')) +plot(braking_1, main = "Wykres rozrzutu z predykcją dla prędkości 30, 31, ..., 50", pch = 16, + xlim = c(0, 50), ylim = c(-50, 200)) +abline(model_1, col = "green", lwd = 2) +points(30:50, pred_30_50_1[, 1], col = "blue", pch = 16) +temp_speed <- data.frame(speed = seq(-5, 55, length = 100)) +pred_1 <- stats::predict(model_1, temp_speed, interval = "prediction") +lines(temp_speed$speed, pred_1[, 2], lty = 2, col = "green") +lines(temp_speed$speed, pred_1[, 3], lty = 2, col = "green") \ No newline at end of file diff --git a/zajecia8/zajecia8.Rproj b/zajecia8/zajecia8.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/zajecia8/zajecia8.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX