Statystyka/zajecia10/zadania.R

137 lines
4.3 KiB
R
Raw Normal View History

2021-06-05 14:32:40 +02:00
# ZAD1
load(url("http://ls.home.amu.edu.pl/data_sets/liver_data.RData"))
head(liver_data)
liver_data$condition <- ifelse(liver_data$condition == "Yes", 1, 0)
model_1 <- glm(condition ~ bilirubin + ldh, data = liver_data, family = 'binomial')
model_1
summary(model_1)
step(model_1)
exp(coef(model_1)[2])
exp(coef(model_1)[3])
install.packages("ROCR")
library(ROCR)
pred_1 <- prediction(model_1$fitted, liver_data$condition)
plot(performance(pred_1, 'tpr', 'fpr'), main = "Model 1")
performance(pred_1, 'auc')@y.values
liver_data_new <- data.frame(bilirubin = c(0.9, 2.1, 3.4), ldh = c(100, 200, 300))
(predict_glm <- stats::predict(model_1,
liver_data_new,
type = 'response'))
model_1_hat <- coef(model_1)[1] +
coef(model_1)[2] * liver_data$bilirubin +
coef(model_1)[3] * liver_data$ldh
model_1_temp <- seq(min(model_1_hat) - 1, max(model_1_hat) + 2.5, length.out = 100)
condition_temp <- exp(model_1_temp) / (1 + exp(model_1_temp))
plot(model_1_temp, condition_temp, type = "l", xlab = "X beta", ylab = "condition",
xlim = c(-6, 9), ylim = c(-0.1, 1.1))
points(model_1_hat, liver_data$condition, pch = 16)
points(coef(model_1)[1] +
coef(model_1)[2] * liver_data_new$bilirubin +
coef(model_1)[3] * liver_data_new$ldh,
predict_glm, pch = 16, col = "red")
# obserwacje odstajace
mode_1_hat_c1 <- model_1_hat[liver_data$condition == 1]
which(model_1_hat == min(mode_1_hat_c1) & liver_data$condition == 1)
which(model_1_hat == min(mode_1_hat_c1[mode_1_hat_c1 > min(mode_1_hat_c1)])
& liver_data$condition == 1)
mode_1_hat_c0 <- model_1_hat[liver_data$condition == 0]
which(model_1_hat == max(mode_1_hat_c0) & liver_data$condition == 0)
liver_data_wo <- liver_data[-c(18, 26, 29), ]
model_1_wo <- glm(condition ~ bilirubin + ldh, data = liver_data_wo, family = 'binomial')
model_1_wo
summary(model_1_wo)
step(model_1_wo)
exp(coef(model_1_wo)[2])
exp(coef(model_1_wo)[3])
pred_1_wo <- prediction(model_1_wo$fitted, liver_data_wo$condition)
plot(performance(pred_1_wo, 'tpr', 'fpr'), main = "Model 1 (wo)")
performance(pred_1_wo, 'auc')@y.values
liver_data_wo_new <- data.frame(bilirubin = c(0.9, 2.1, 3.4), ldh = c(100, 200, 300))
(predict_glm_wo <- stats::predict(model_1_wo,
liver_data_wo_new,
type = 'response'))
model_1_wo_hat <- coef(model_1_wo)[1] +
coef(model_1_wo)[2] * liver_data_wo$bilirubin +
coef(model_1_wo)[3] * liver_data_wo$ldh
model_1_wo_temp <- seq(min(model_1_wo_hat) - 10, max(model_1_wo_hat) + 20, length.out = 100)
condition_wo_temp <- exp(model_1_wo_temp) / (1 + exp(model_1_wo_temp))
plot(model_1_wo_temp, condition_wo_temp, type = "l", xlab = "X beta", ylab = "condition",
xlim = c(-40, 89), ylim = c(-0.1, 1.1))
points(model_1_wo_hat, liver_data_wo$condition, pch = 16)
points(coef(model_1_wo)[1] +
coef(model_1_wo)[2] * liver_data_wo_new$bilirubin +
coef(model_1_wo)[3] * liver_data_wo_new$ldh,
predict_glm_wo, pch = 16, col = "red")
# ZAD2
install.packages("DAAG")
library(DAAG)
head(moths)
model_1 <- glm(A ~ log(meters), data = moths, family = 'poisson')
model_1
summary(model_1)
step(model_1)
data_new <- data.frame(meters = c(3, 20, 100))
(pred_1 <- stats::predict(model_1, data_new, type = "response"))
moths$A_hat <- stats::predict(model_1, type = "response")
moths <- moths[with(moths, order(meters)), ]
plot(log(moths$meters), moths$A_hat,
type = "l", col = "red", lwd = 2,
xlab = "log(moths$meters)", ylab = "A",
ylim = c(0, 40), main = "Model 1")
points(log(moths$meters), moths$A, pch = 16)
points(log(data_new$meters), pred_1, pch = 16, col = "blue", lwd = 4)
cat("1.", "\n")
rm(moths)
model_2 <- glm(P ~ log(meters), data = moths, family = 'poisson')
model_2
cat("2.", "\n")
summary(model_2)
cat("3.", "\n")
step(model_2)
cat("4.", "\n")
(pred_2 <- stats::predict(model_2, data_new, type = "response"))
moths$P_hat <- stats::predict(model_2, type = "response")
moths <- moths[with(moths, order(meters)), ]
plot(log(moths$meters), moths$P_hat,
type = "l", col = "red", lwd = 2,
xlab = "log(moths$meters)", ylab = "P",
ylim = c(0, 20), main = "Model 2")
points(log(moths$meters), moths$P, pch = 16)
points(log(data_new$meters), pred_2, pch = 16, col = "blue", lwd = 4)