zajecia 9-10
This commit is contained in:
parent
a16b81b525
commit
49e5ce18ca
BIN
zajecia10/.RData
Normal file
BIN
zajecia10/.RData
Normal file
Binary file not shown.
99
zajecia10/.Rhistory
Normal file
99
zajecia10/.Rhistory
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
load(url("http://ls.home.amu.edu.pl/data_sets/liver_data.RData"))
|
||||||
|
head(liver_data)
|
||||||
|
liver_data$condition <- ifelse(liver_data$condition == "Yes", 1, 0)
|
||||||
|
model_1 <- glm(condition ~ bilirubin + ldh, data = liver_data, family = 'binomial')
|
||||||
|
model_1
|
||||||
|
summary(model_1)
|
||||||
|
step(model_1)
|
||||||
|
exp(coef(model_1)[2])
|
||||||
|
exp(coef(model_1)[3])
|
||||||
|
View(liver_data)
|
||||||
|
View(liver_data)
|
||||||
|
library(ROCR)
|
||||||
|
install.packages("ROCR")
|
||||||
|
library(ROCR)
|
||||||
|
pred_1 <- prediction(model_1$fitted, liver_data$condition)
|
||||||
|
plot(performance(pred_1, 'tpr', 'fpr'), main = "Model 1")
|
||||||
|
performance(pred_1, 'auc')@y.values
|
||||||
|
liver_data_new <- data.frame(bilirubin = c(0.9, 2.1, 3.4), ldh = c(100, 200, 300))
|
||||||
|
(predict_glm <- stats::predict(model_1,
|
||||||
|
liver_data_new,
|
||||||
|
type = 'response'))
|
||||||
|
model_1_hat <- coef(model_1)[1] +
|
||||||
|
coef(model_1)[2] * liver_data$bilirubin +
|
||||||
|
coef(model_1)[3] * liver_data$ldh
|
||||||
|
model_1_temp <- seq(min(model_1_hat) - 1, max(model_1_hat) + 2.5, length.out = 100)
|
||||||
|
condition_temp <- exp(model_1_temp) / (1 + exp(model_1_temp))
|
||||||
|
plot(model_1_temp, condition_temp, type = "l", xlab = "X beta", ylab = "condition",
|
||||||
|
xlim = c(-6, 9), ylim = c(-0.1, 1.1))
|
||||||
|
points(model_1_hat, liver_data$condition, pch = 16)
|
||||||
|
points(coef(model_1)[1] +
|
||||||
|
coef(model_1)[2] * liver_data_new$bilirubin +
|
||||||
|
coef(model_1)[3] * liver_data_new$ldh,
|
||||||
|
predict_glm, pch = 16, col = "red")
|
||||||
|
mode_1_hat_c1 <- model_1_hat[liver_data$condition == 1]
|
||||||
|
which(model_1_hat == min(mode_1_hat_c1) & liver_data$condition == 1)
|
||||||
|
which(model_1_hat == min(mode_1_hat_c1[mode_1_hat_c1 > min(mode_1_hat_c1)])
|
||||||
|
& liver_data$condition == 1)
|
||||||
|
mode_1_hat_c0 <- model_1_hat[liver_data$condition == 0]
|
||||||
|
which(model_1_hat == max(mode_1_hat_c0) & liver_data$condition == 0)
|
||||||
|
liver_data_wo <- liver_data[-c(18, 26, 29), ]
|
||||||
|
model_1_wo <- glm(condition ~ bilirubin + ldh, data = liver_data_wo, family = 'binomial')
|
||||||
|
model_1_wo
|
||||||
|
summary(model_1_wo)
|
||||||
|
step(model_1_wo)
|
||||||
|
exp(coef(model_1_wo)[2])
|
||||||
|
exp(coef(model_1_wo)[3])
|
||||||
|
pred_1_wo <- prediction(model_1_wo$fitted, liver_data_wo$condition)
|
||||||
|
plot(performance(pred_1_wo, 'tpr', 'fpr'), main = "Model 1 (wo)")
|
||||||
|
performance(pred_1_wo, 'auc')@y.values
|
||||||
|
liver_data_wo_new <- data.frame(bilirubin = c(0.9, 2.1, 3.4), ldh = c(100, 200, 300))
|
||||||
|
(predict_glm_wo <- stats::predict(model_1_wo,
|
||||||
|
liver_data_wo_new,
|
||||||
|
type = 'response'))
|
||||||
|
model_1_wo_hat <- coef(model_1_wo)[1] +
|
||||||
|
coef(model_1_wo)[2] * liver_data_wo$bilirubin +
|
||||||
|
coef(model_1_wo)[3] * liver_data_wo$ldh
|
||||||
|
model_1_wo_temp <- seq(min(model_1_wo_hat) - 10, max(model_1_wo_hat) + 20, length.out = 100)
|
||||||
|
condition_wo_temp <- exp(model_1_wo_temp) / (1 + exp(model_1_wo_temp))
|
||||||
|
plot(model_1_wo_temp, condition_wo_temp, type = "l", xlab = "X beta", ylab = "condition",
|
||||||
|
xlim = c(-40, 89), ylim = c(-0.1, 1.1))
|
||||||
|
points(model_1_wo_hat, liver_data_wo$condition, pch = 16)
|
||||||
|
points(coef(model_1_wo)[1] +
|
||||||
|
coef(model_1_wo)[2] * liver_data_wo_new$bilirubin +
|
||||||
|
coef(model_1_wo)[3] * liver_data_wo_new$ldh,
|
||||||
|
predict_glm_wo, pch = 16, col = "red")
|
||||||
|
library(DAAG)
|
||||||
|
install.packages("DAAG")
|
||||||
|
library(DAAG)
|
||||||
|
head(moths)
|
||||||
|
model_1 <- glm(A ~ log(meters), data = moths, family = 'poisson')
|
||||||
|
model_1
|
||||||
|
summary(model_1)
|
||||||
|
step(model_1)
|
||||||
|
data_new <- data.frame(meters = c(3, 20, 100))
|
||||||
|
(pred_1 <- stats::predict(model_1, data_new, type = "response"))
|
||||||
|
moths$A_hat <- stats::predict(model_1, type = "response")
|
||||||
|
moths <- moths[with(moths, order(meters)), ]
|
||||||
|
plot(log(moths$meters), moths$A_hat,
|
||||||
|
type = "l", col = "red", lwd = 2,
|
||||||
|
xlab = "log(moths$meters)", ylab = "A",
|
||||||
|
ylim = c(0, 40), main = "Model 1")
|
||||||
|
points(log(moths$meters), moths$A, pch = 16)
|
||||||
|
points(log(data_new$meters), pred_1, pch = 16, col = "blue", lwd = 4)
|
||||||
|
cat("1.", "\n")
|
||||||
|
rm(moths)
|
||||||
|
model_2 <- glm(P ~ log(meters), data = moths, family = 'poisson')
|
||||||
|
model_2
|
||||||
|
cat("2.", "\n")
|
||||||
|
summary(model_2)
|
||||||
|
step(model_2)
|
||||||
|
(pred_2 <- stats::predict(model_2, data_new, type = "response"))
|
||||||
|
moths$P_hat <- stats::predict(model_2, type = "response")
|
||||||
|
moths <- moths[with(moths, order(meters)), ]
|
||||||
|
plot(log(moths$meters), moths$P_hat,
|
||||||
|
type = "l", col = "red", lwd = 2,
|
||||||
|
xlab = "log(moths$meters)", ylab = "P",
|
||||||
|
ylim = c(0, 20), main = "Model 2")
|
||||||
|
points(log(moths$meters), moths$P, pch = 16)
|
||||||
|
points(log(data_new$meters), pred_2, pch = 16, col = "blue", lwd = 4)
|
35
zajecia10/README.md
Normal file
35
zajecia10/README.md
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# Zajęcia 10
|
||||||
|
|
||||||
|
|
||||||
|
## Regresja logistyczna
|
||||||
|
Regresja logistyczna – jedna z metod regresji używanych w statystyce w przypadku, gdy zmienna zależna jest na skali dychotomicznej (przyjmuje tylko dwie wartości). Zmienne niezależne w analizie regresji logistycznej mogą przyjmować charakter nominalny, porządkowy, przedziałowy lub ilorazowy. W przypadku zmiennych nominalnych oraz porządkowych następuje ich przekodowanie w liczbę zmiennych zero-jedynkowych taką samą lub o 1 mniejszą niż liczba kategorii w jej definicji.
|
||||||
|
|
||||||
|
Zwykle wartości zmiennej objaśnianej wskazują na wystąpienie, lub brak wystąpienia pewnego zdarzenia, które chcemy prognozować. Regresja logistyczna pozwala wówczas na obliczanie prawdopodobieństwa tego zdarzenia (tzw. prawdopodobieństwo sukcesu).
|
||||||
|
|
||||||
|
|
||||||
|
## Regresja Poissona
|
||||||
|
Rozkład Poissona – dyskretny rozkład prawdopodobieństwa, wyrażający prawdopodobieństwo szeregu wydarzeń mających miejsce w określonym czasie, gdy te wydarzenia występują ze znaną średnią częstotliwością i w sposób niezależny od czasu jaki upłynął od ostatniego zajścia takiego zdarzenia.
|
||||||
|
|
||||||
|
Regresja to pewnie dla danych w rozkładzie Poissona.
|
||||||
|
|
||||||
|
|
||||||
|
## Krzywa ROC
|
||||||
|
W statystyce matematycznej krzywa ROC jest graficzną reprezentacją efektywności modelu predykcyjnego poprzez wykreślenie charakterystyki jakościowej klasyfikatorów binarnych powstałych z modelu przy zastosowaniu wielu różnych punktów odcięcia. Mówiąc inaczej – każdy punkt krzywej ROC odpowiada innej macierzy błędu uzyskanej przez modyfikowanie „cut-off point”.
|
||||||
|
<br><br>
|
||||||
|
![ROC](roc.png)
|
||||||
|
|
||||||
|
Pojęcia:
|
||||||
|
- TPR (True Positive Rate) – określa zdolność klasyfikatora do wykrywania klasy pozytywnej
|
||||||
|
|
||||||
|
- Lepiej jest powyżej przekątnej, gorzej poniżej. Przekątna to klasyfikator losowy. Przekątna idzie od początku układu współrzędnych.
|
||||||
|
|
||||||
|
- False positive rate = 1-specyficzność
|
||||||
|
|
||||||
|
- Punkt równowagi to klasyfikator gdzie czułość = specyficzność. Jest to na przecięciu drugiej przekątnej z wykresem.
|
||||||
|
|
||||||
|
- AUC - Interpretacja AUROC (Area Under the ROC) to prawdopodobieństwo, że badany model predykcyjny oceni wyżej (wartość score) losowy element klasy pozytywnej od losowego elementu klasy negatywnej.
|
||||||
|
|
||||||
|
|
||||||
|
## Notatki
|
||||||
|
- Liczba stopni swobody – liczba niezależnych wyników obserwacji pomniejszona o liczbę związków, które łączą te wyniki ze sobą.
|
||||||
|
Liczbę stopni swobody można utożsamiać z **liczbą niezależnych zmiennych losowych**, które wpływają na wynik. Inną interpretacją liczby stopni swobody może być: liczba obserwacji minus liczba parametrów estymowanych przy pomocy tych obserwacji.
|
BIN
zajecia10/Zajęcia10.pdf
Normal file
BIN
zajecia10/Zajęcia10.pdf
Normal file
Binary file not shown.
BIN
zajecia10/roc.png
Normal file
BIN
zajecia10/roc.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 153 KiB |
136
zajecia10/zadania.R
Normal file
136
zajecia10/zadania.R
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
# ZAD1
|
||||||
|
load(url("http://ls.home.amu.edu.pl/data_sets/liver_data.RData"))
|
||||||
|
head(liver_data)
|
||||||
|
|
||||||
|
liver_data$condition <- ifelse(liver_data$condition == "Yes", 1, 0)
|
||||||
|
model_1 <- glm(condition ~ bilirubin + ldh, data = liver_data, family = 'binomial')
|
||||||
|
model_1
|
||||||
|
|
||||||
|
summary(model_1)
|
||||||
|
|
||||||
|
step(model_1)
|
||||||
|
|
||||||
|
exp(coef(model_1)[2])
|
||||||
|
exp(coef(model_1)[3])
|
||||||
|
|
||||||
|
install.packages("ROCR")
|
||||||
|
library(ROCR)
|
||||||
|
pred_1 <- prediction(model_1$fitted, liver_data$condition)
|
||||||
|
plot(performance(pred_1, 'tpr', 'fpr'), main = "Model 1")
|
||||||
|
performance(pred_1, 'auc')@y.values
|
||||||
|
|
||||||
|
liver_data_new <- data.frame(bilirubin = c(0.9, 2.1, 3.4), ldh = c(100, 200, 300))
|
||||||
|
(predict_glm <- stats::predict(model_1,
|
||||||
|
liver_data_new,
|
||||||
|
type = 'response'))
|
||||||
|
model_1_hat <- coef(model_1)[1] +
|
||||||
|
coef(model_1)[2] * liver_data$bilirubin +
|
||||||
|
coef(model_1)[3] * liver_data$ldh
|
||||||
|
model_1_temp <- seq(min(model_1_hat) - 1, max(model_1_hat) + 2.5, length.out = 100)
|
||||||
|
condition_temp <- exp(model_1_temp) / (1 + exp(model_1_temp))
|
||||||
|
plot(model_1_temp, condition_temp, type = "l", xlab = "X beta", ylab = "condition",
|
||||||
|
xlim = c(-6, 9), ylim = c(-0.1, 1.1))
|
||||||
|
points(model_1_hat, liver_data$condition, pch = 16)
|
||||||
|
points(coef(model_1)[1] +
|
||||||
|
coef(model_1)[2] * liver_data_new$bilirubin +
|
||||||
|
coef(model_1)[3] * liver_data_new$ldh,
|
||||||
|
predict_glm, pch = 16, col = "red")
|
||||||
|
|
||||||
|
# obserwacje odstajace
|
||||||
|
mode_1_hat_c1 <- model_1_hat[liver_data$condition == 1]
|
||||||
|
which(model_1_hat == min(mode_1_hat_c1) & liver_data$condition == 1)
|
||||||
|
which(model_1_hat == min(mode_1_hat_c1[mode_1_hat_c1 > min(mode_1_hat_c1)])
|
||||||
|
& liver_data$condition == 1)
|
||||||
|
mode_1_hat_c0 <- model_1_hat[liver_data$condition == 0]
|
||||||
|
which(model_1_hat == max(mode_1_hat_c0) & liver_data$condition == 0)
|
||||||
|
|
||||||
|
|
||||||
|
liver_data_wo <- liver_data[-c(18, 26, 29), ]
|
||||||
|
model_1_wo <- glm(condition ~ bilirubin + ldh, data = liver_data_wo, family = 'binomial')
|
||||||
|
model_1_wo
|
||||||
|
|
||||||
|
summary(model_1_wo)
|
||||||
|
|
||||||
|
step(model_1_wo)
|
||||||
|
|
||||||
|
exp(coef(model_1_wo)[2])
|
||||||
|
exp(coef(model_1_wo)[3])
|
||||||
|
|
||||||
|
pred_1_wo <- prediction(model_1_wo$fitted, liver_data_wo$condition)
|
||||||
|
plot(performance(pred_1_wo, 'tpr', 'fpr'), main = "Model 1 (wo)")
|
||||||
|
performance(pred_1_wo, 'auc')@y.values
|
||||||
|
|
||||||
|
|
||||||
|
liver_data_wo_new <- data.frame(bilirubin = c(0.9, 2.1, 3.4), ldh = c(100, 200, 300))
|
||||||
|
(predict_glm_wo <- stats::predict(model_1_wo,
|
||||||
|
liver_data_wo_new,
|
||||||
|
type = 'response'))
|
||||||
|
model_1_wo_hat <- coef(model_1_wo)[1] +
|
||||||
|
coef(model_1_wo)[2] * liver_data_wo$bilirubin +
|
||||||
|
coef(model_1_wo)[3] * liver_data_wo$ldh
|
||||||
|
model_1_wo_temp <- seq(min(model_1_wo_hat) - 10, max(model_1_wo_hat) + 20, length.out = 100)
|
||||||
|
condition_wo_temp <- exp(model_1_wo_temp) / (1 + exp(model_1_wo_temp))
|
||||||
|
plot(model_1_wo_temp, condition_wo_temp, type = "l", xlab = "X beta", ylab = "condition",
|
||||||
|
xlim = c(-40, 89), ylim = c(-0.1, 1.1))
|
||||||
|
points(model_1_wo_hat, liver_data_wo$condition, pch = 16)
|
||||||
|
points(coef(model_1_wo)[1] +
|
||||||
|
coef(model_1_wo)[2] * liver_data_wo_new$bilirubin +
|
||||||
|
coef(model_1_wo)[3] * liver_data_wo_new$ldh,
|
||||||
|
predict_glm_wo, pch = 16, col = "red")
|
||||||
|
|
||||||
|
|
||||||
|
# ZAD2
|
||||||
|
install.packages("DAAG")
|
||||||
|
library(DAAG)
|
||||||
|
head(moths)
|
||||||
|
|
||||||
|
model_1 <- glm(A ~ log(meters), data = moths, family = 'poisson')
|
||||||
|
model_1
|
||||||
|
|
||||||
|
summary(model_1)
|
||||||
|
|
||||||
|
step(model_1)
|
||||||
|
|
||||||
|
data_new <- data.frame(meters = c(3, 20, 100))
|
||||||
|
(pred_1 <- stats::predict(model_1, data_new, type = "response"))
|
||||||
|
moths$A_hat <- stats::predict(model_1, type = "response")
|
||||||
|
moths <- moths[with(moths, order(meters)), ]
|
||||||
|
plot(log(moths$meters), moths$A_hat,
|
||||||
|
type = "l", col = "red", lwd = 2,
|
||||||
|
xlab = "log(moths$meters)", ylab = "A",
|
||||||
|
ylim = c(0, 40), main = "Model 1")
|
||||||
|
points(log(moths$meters), moths$A, pch = 16)
|
||||||
|
points(log(data_new$meters), pred_1, pch = 16, col = "blue", lwd = 4)
|
||||||
|
|
||||||
|
|
||||||
|
cat("1.", "\n")
|
||||||
|
rm(moths)
|
||||||
|
model_2 <- glm(P ~ log(meters), data = moths, family = 'poisson')
|
||||||
|
model_2
|
||||||
|
|
||||||
|
|
||||||
|
cat("2.", "\n")
|
||||||
|
summary(model_2)
|
||||||
|
|
||||||
|
|
||||||
|
cat("3.", "\n")
|
||||||
|
step(model_2)
|
||||||
|
|
||||||
|
|
||||||
|
cat("4.", "\n")
|
||||||
|
(pred_2 <- stats::predict(model_2, data_new, type = "response"))
|
||||||
|
moths$P_hat <- stats::predict(model_2, type = "response")
|
||||||
|
moths <- moths[with(moths, order(meters)), ]
|
||||||
|
plot(log(moths$meters), moths$P_hat,
|
||||||
|
type = "l", col = "red", lwd = 2,
|
||||||
|
xlab = "log(moths$meters)", ylab = "P",
|
||||||
|
ylim = c(0, 20), main = "Model 2")
|
||||||
|
points(log(moths$meters), moths$P, pch = 16)
|
||||||
|
points(log(data_new$meters), pred_2, pch = 16, col = "blue", lwd = 4)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
13
zajecia10/zajecia10.Rproj
Normal file
13
zajecia10/zajecia10.Rproj
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
Version: 1.0
|
||||||
|
|
||||||
|
RestoreWorkspace: Default
|
||||||
|
SaveWorkspace: Default
|
||||||
|
AlwaysSaveHistory: Default
|
||||||
|
|
||||||
|
EnableCodeIndexing: Yes
|
||||||
|
UseSpacesForTab: Yes
|
||||||
|
NumSpacesForTab: 2
|
||||||
|
Encoding: UTF-8
|
||||||
|
|
||||||
|
RnwWeave: Sweave
|
||||||
|
LaTeX: pdfLaTeX
|
BIN
zajecia9/.RData
Normal file
BIN
zajecia9/.RData
Normal file
Binary file not shown.
82
zajecia9/.Rhistory
Normal file
82
zajecia9/.Rhistory
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
auto <- read.csv("http://ls.home.amu.edu.pl/data_sets/Automobile.csv",
|
||||||
|
sep = ",", header = TRUE, na.strings = "?")
|
||||||
|
head(auto)
|
||||||
|
auto$num.of.doors <- ifelse(auto$num.of.doors == "four", 4, 2)
|
||||||
|
auto_wna <- na.omit(auto)
|
||||||
|
cat("wymiar nowych danych")
|
||||||
|
dim(auto_wna)
|
||||||
|
auto_wna_sel <- subset(auto_wna, select = c(horsepower, city.mpg, peak.rpm,
|
||||||
|
curb.weight, num.of.doors, price))
|
||||||
|
pairs(auto_wna_sel)
|
||||||
|
model_1 <- lm(price ~ horsepower + city.mpg + peak.rpm + curb.weight +
|
||||||
|
num.of.doors, data = auto_wna)
|
||||||
|
model_1
|
||||||
|
coef(model_1)
|
||||||
|
confint(model_1)
|
||||||
|
summary(model_1)
|
||||||
|
fitted(model_1)
|
||||||
|
residuals(model_1)
|
||||||
|
step(model_1)
|
||||||
|
step(model_1, k = log(nrow(auto_wna)))
|
||||||
|
model_0 <- lm(price ~ 1, data = auto_wna)
|
||||||
|
step(model_0, direction = "forward", scope = formula(model_1))
|
||||||
|
step(model_0, direction = "forward", scope = formula(model_1), k = log(nrow(auto_wna)))
|
||||||
|
model_1_1 <- lm(price ~ horsepower + city.mpg + curb.weight + num.of.doors, data = auto_wna)
|
||||||
|
summary(model_1_1)$coefficients
|
||||||
|
summary(model_1_1)$adj.r.squared
|
||||||
|
model_1_2 <- lm(price ~ horsepower + curb.weight + num.of.doors, data = auto_wna)
|
||||||
|
summary(model_1_2)$coefficients
|
||||||
|
summary(model_1_2)$adj.r.squared
|
||||||
|
model_1_3 <- lm(price ~ horsepower + curb.weight, data = auto_wna)
|
||||||
|
summary(model_1_3)$coefficients
|
||||||
|
summary(model_1_3)$adj.r.squared
|
||||||
|
auto_sel <- subset(auto, select = c(horsepower, city.mpg, peak.rpm,
|
||||||
|
curb.weight, num.of.doors, price))
|
||||||
|
summary(auto_sel)
|
||||||
|
library(Hmisc)
|
||||||
|
install.packages("Hmisc")
|
||||||
|
library(Hmisc)
|
||||||
|
auto_sel$price <- as.numeric(impute(auto_sel$price, mean))
|
||||||
|
auto_sel$horsepower <- as.numeric(impute(auto_sel$horsepower, mean))
|
||||||
|
auto_sel$peak.rpm <- as.numeric(impute(auto_sel$peak.rpm, mean))
|
||||||
|
auto_sel$num.of.doors <- as.numeric(impute(auto_sel$num.of.doors, median))
|
||||||
|
summary(auto_sel)
|
||||||
|
View(auto)
|
||||||
|
View(auto)
|
||||||
|
View(auto_sel)
|
||||||
|
View(auto_sel)
|
||||||
|
cat("2.", "\n")
|
||||||
|
View(auto_wna_sel)
|
||||||
|
View(auto_wna_sel)
|
||||||
|
pairs(auto_sel)
|
||||||
|
model_1_i <- lm(price ~ horsepower + city.mpg + peak.rpm + curb.weight +
|
||||||
|
num.of.doors, data = auto_sel)
|
||||||
|
model_1_i
|
||||||
|
coef(model_1_i)
|
||||||
|
confint(model_1_i)
|
||||||
|
summary(model_1_i)
|
||||||
|
fitted(model_1_i)
|
||||||
|
residuals(model_1_i)
|
||||||
|
cat("3.", "\n")
|
||||||
|
step(model_1_i)
|
||||||
|
step(model_1_i, k = log(nrow(auto_sel)))
|
||||||
|
model_0_i <- lm(price ~ 1, data = auto_sel)
|
||||||
|
step(model_0_i, direction = "forward", scope = formula(model_1_i))
|
||||||
|
step(model_0_i, direction = "forward", scope = formula(model_1_i), k = log(nrow(auto_sel)))
|
||||||
|
cat("4.", "\n")
|
||||||
|
model_1_i_1 <- lm(price ~ horsepower + city.mpg + curb.weight + peak.rpm, data = auto_sel)
|
||||||
|
summary(model_1_i_1)$coefficients
|
||||||
|
summary(model_1_i_1)$adj.r.squared
|
||||||
|
model_1_i_2 <- lm(price ~ horsepower + city.mpg + curb.weight, data = auto_sel)
|
||||||
|
summary(model_1_i_2)$coefficients
|
||||||
|
summary(model_1_i_2)$adj.r.squared
|
||||||
|
model_1_i_3 <- lm(price ~ horsepower + curb.weight, data = auto_sel)
|
||||||
|
summary(model_1_i_3)$coefficients
|
||||||
|
summary(model_1_i_3)$adj.r.squared
|
||||||
|
new_data <- data.frame(curb.weight = 2823, horsepower = 154)
|
||||||
|
model_2 <- lm(price ~ curb.weight + horsepower, data = auto_wna)
|
||||||
|
model_2_i <- lm(price ~ curb.weight + horsepower, data = auto_sel)
|
||||||
|
stats::predict(model_2, new_data, interval = "prediction")
|
||||||
|
stats::predict(model_2_i, new_data, interval = "prediction")
|
||||||
|
summary(model_2)$adj.r.squared
|
||||||
|
summary(model_2_i)$adj.r.squared
|
21
zajecia9/README.md
Normal file
21
zajecia9/README.md
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# Zajęcia 9
|
||||||
|
|
||||||
|
|
||||||
|
## Regresja wielokrotna
|
||||||
|
Z regresją wielokrotną (wieloraką) mamy do czynienia, jeśli zmianna zależna Y związana jest z większą ilością zmiennych niezależnych
|
||||||
|
|
||||||
|
|
||||||
|
## Regresja krokowa
|
||||||
|
Regresja krokowa jest odmianą analizy regresji (nie tylko regresji liniowej), w której do modelu wprowadzane są jedynie istotne statystycznie zmienne, predyktory, które rzeczywiście „poprawiają” zbudowany model.
|
||||||
|
|
||||||
|
|
||||||
|
## BIC AIC
|
||||||
|
Kryteria wyboru pomiędzy modelami statystycznymi.
|
||||||
|
- [BIC](https://pl.wikipedia.org/wiki/Bayesowskie_kryterium_informacyjne_Schwarza) wartość BIC musi być jak najmniejsza
|
||||||
|
- [AIC](https://pl.wikipedia.org/wiki/Kryterium_informacyjne_Akaikego) wartość AIC musi być jak najmniejsza
|
||||||
|
|
||||||
|
|
||||||
|
## Notatki
|
||||||
|
- lm to funkcja która tworzy/dopasowuje regresję liniową
|
||||||
|
- step z k=log to regresja krokowa BIC, k=2 to AIC
|
||||||
|
|
BIN
zajecia9/Zajęcia9.pdf
Normal file
BIN
zajecia9/Zajęcia9.pdf
Normal file
Binary file not shown.
95
zajecia9/zadania.R
Normal file
95
zajecia9/zadania.R
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
# ZAD1
|
||||||
|
auto <- read.csv("http://ls.home.amu.edu.pl/data_sets/Automobile.csv",
|
||||||
|
sep = ",", header = TRUE, na.strings = "?")
|
||||||
|
head(auto)
|
||||||
|
auto$num.of.doors <- ifelse(auto$num.of.doors == "four", 4, 2)
|
||||||
|
|
||||||
|
auto_wna <- na.omit(auto)
|
||||||
|
cat("wymiar nowych danych")
|
||||||
|
dim(auto_wna)
|
||||||
|
|
||||||
|
auto_wna_sel <- subset(auto_wna, select = c(horsepower, city.mpg, peak.rpm,
|
||||||
|
curb.weight, num.of.doors, price))
|
||||||
|
pairs(auto_wna_sel)
|
||||||
|
model_1 <- lm(price ~ horsepower + city.mpg + peak.rpm + curb.weight +
|
||||||
|
num.of.doors, data = auto_wna)
|
||||||
|
model_1
|
||||||
|
|
||||||
|
coef(model_1)
|
||||||
|
confint(model_1)
|
||||||
|
|
||||||
|
summary(model_1)
|
||||||
|
|
||||||
|
fitted(model_1)
|
||||||
|
residuals(model_1)
|
||||||
|
|
||||||
|
step(model_1)
|
||||||
|
step(model_1, k = log(nrow(auto_wna)))
|
||||||
|
model_0 <- lm(price ~ 1, data = auto_wna)
|
||||||
|
step(model_0, direction = "forward", scope = formula(model_1))
|
||||||
|
step(model_0, direction = "forward", scope = formula(model_1), k = log(nrow(auto_wna)))
|
||||||
|
|
||||||
|
model_1_1 <- lm(price ~ horsepower + city.mpg + curb.weight + num.of.doors, data = auto_wna)
|
||||||
|
summary(model_1_1)$coefficients
|
||||||
|
summary(model_1_1)$adj.r.squared
|
||||||
|
model_1_2 <- lm(price ~ horsepower + curb.weight + num.of.doors, data = auto_wna)
|
||||||
|
summary(model_1_2)$coefficients
|
||||||
|
summary(model_1_2)$adj.r.squared
|
||||||
|
model_1_3 <- lm(price ~ horsepower + curb.weight, data = auto_wna)
|
||||||
|
summary(model_1_3)$coefficients
|
||||||
|
summary(model_1_3)$adj.r.squared
|
||||||
|
|
||||||
|
auto_sel <- subset(auto, select = c(horsepower, city.mpg, peak.rpm,
|
||||||
|
curb.weight, num.of.doors, price))
|
||||||
|
summary(auto_sel)
|
||||||
|
install.packages("Hmisc")
|
||||||
|
library(Hmisc)
|
||||||
|
auto_sel$price <- as.numeric(impute(auto_sel$price, mean))
|
||||||
|
auto_sel$horsepower <- as.numeric(impute(auto_sel$horsepower, mean))
|
||||||
|
auto_sel$peak.rpm <- as.numeric(impute(auto_sel$peak.rpm, mean))
|
||||||
|
auto_sel$num.of.doors <- as.numeric(impute(auto_sel$num.of.doors, median))
|
||||||
|
summary(auto_sel)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cat("2.", "\n")
|
||||||
|
pairs(auto_sel)
|
||||||
|
model_1_i <- lm(price ~ horsepower + city.mpg + peak.rpm + curb.weight +
|
||||||
|
num.of.doors, data = auto_sel)
|
||||||
|
model_1_i
|
||||||
|
coef(model_1_i)
|
||||||
|
confint(model_1_i)
|
||||||
|
summary(model_1_i)
|
||||||
|
fitted(model_1_i)
|
||||||
|
residuals(model_1_i)
|
||||||
|
|
||||||
|
cat("3.", "\n")
|
||||||
|
step(model_1_i)
|
||||||
|
step(model_1_i, k = log(nrow(auto_sel)))
|
||||||
|
model_0_i <- lm(price ~ 1, data = auto_sel)
|
||||||
|
step(model_0_i, direction = "forward", scope = formula(model_1_i))
|
||||||
|
step(model_0_i, direction = "forward", scope = formula(model_1_i), k = log(nrow(auto_sel)))
|
||||||
|
|
||||||
|
cat("4.", "\n")
|
||||||
|
model_1_i_1 <- lm(price ~ horsepower + city.mpg + curb.weight + peak.rpm, data = auto_sel)
|
||||||
|
summary(model_1_i_1)$coefficients
|
||||||
|
summary(model_1_i_1)$adj.r.squared
|
||||||
|
model_1_i_2 <- lm(price ~ horsepower + city.mpg + curb.weight, data = auto_sel)
|
||||||
|
summary(model_1_i_2)$coefficients
|
||||||
|
summary(model_1_i_2)$adj.r.squared
|
||||||
|
model_1_i_3 <- lm(price ~ horsepower + curb.weight, data = auto_sel)
|
||||||
|
summary(model_1_i_3)$coefficients
|
||||||
|
summary(model_1_i_3)$adj.r.squared
|
||||||
|
|
||||||
|
new_data <- data.frame(curb.weight = 2823, horsepower = 154)
|
||||||
|
model_2 <- lm(price ~ curb.weight + horsepower, data = auto_wna)
|
||||||
|
model_2_i <- lm(price ~ curb.weight + horsepower, data = auto_sel)
|
||||||
|
stats::predict(model_2, new_data, interval = "prediction")
|
||||||
|
stats::predict(model_2_i, new_data, interval = "prediction")
|
||||||
|
summary(model_2)$adj.r.squared
|
||||||
|
summary(model_2_i)$adj.r.squared
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
13
zajecia9/zajecia9.Rproj
Normal file
13
zajecia9/zajecia9.Rproj
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
Version: 1.0
|
||||||
|
|
||||||
|
RestoreWorkspace: Default
|
||||||
|
SaveWorkspace: Default
|
||||||
|
AlwaysSaveHistory: Default
|
||||||
|
|
||||||
|
EnableCodeIndexing: Yes
|
||||||
|
UseSpacesForTab: Yes
|
||||||
|
NumSpacesForTab: 2
|
||||||
|
Encoding: UTF-8
|
||||||
|
|
||||||
|
RnwWeave: Sweave
|
||||||
|
LaTeX: pdfLaTeX
|
Loading…
Reference in New Issue
Block a user