Statystyka/zajecia14/zadania.R

147 lines
3.9 KiB
R
Raw Permalink Normal View History

2021-06-21 18:44:10 +02:00
# ZAD 1
library(MASS)
v <- 10
blad_i <- numeric(v)
krok <- floor(nrow(iris) / v)
permutacja <- sample(1:nrow(iris))
temp <- 0
for (i in 1:v) {
if (i != v) {
obs_temp <- permutacja[(temp + 1):(i * krok)]
temp <- i * krok
} else {
obs_temp <- permutacja[(temp + 1):nrow(iris)]
}
model_lda_i <- lda(Species ~ ., data = iris[-obs_temp, ])
pred_i <- stats::predict(model_lda_i, iris[obs_temp, ])$class
blad_i[i] <- sum(pred_i != iris$Species[obs_temp])
}
sum(blad_i) / nrow(iris)
# bootstrap
n_boot <- 100
temp_boot <- numeric(n_boot)
set.seed(1234)
for (i in 1:n_boot) {
numery <- sample(1:nrow(iris), replace = TRUE)
model_lda_i <- lda(Species ~ ., data = iris[numery, ])
temp_boot[i] <- mean(stats::predict(model_lda_i, iris[-numery, ])$class != iris[-numery, ]$Species)
}
mean(temp_boot)
# w pakiecie caret dla sprawdzenia
install.packages("caret")
library(caret)
ctrl_boot <- trainControl(method = 'boot',
number = 100,
search = 'grid')
ctrl_loo <- trainControl(method = 'LOOCV',
search = 'grid')
ctrl_10CV <- trainControl(method = "repeatedcv",
number = 10,
repeats = 10)
train(Species ~ ., data = iris, method = 'lda', trControl = ctrl_boot)
train(Species ~ ., data = iris, method = 'lda', trControl = ctrl_loo)
train(Species ~ ., data = iris, method = 'lda', trControl = ctrl_10CV)
# ZAD 2
wina <- read.table('http://ls.home.amu.edu.pl/data_sets/wina.txt')
head(wina)
cat("...")
wina$V14 <- as.factor(wina$V14)
dim(wina)
table(wina$V14)
library(MASS)
model_lda <- lda(V14 ~ V1 + V2 + V3, data = wina)
model_lda$prior
model_lda$means
model_lda$scaling
head(stats::predict(model_lda)$posterior)
head(stats::predict(model_lda)$class)
(conf_matrix <- table(stats::predict(model_lda)$class, wina$V14))
(1 - sum(diag(conf_matrix)) / nrow(wina))
pred_loo <- numeric(nrow(wina))
for (i in 1:nrow(wina)) {
model_lda_i <- lda(V14 ~ V1 + V2 + V3, data = wina[-i, ])
pred_loo[i] <- stats::predict(model_lda_i, wina[i, ])$class
}
table(wina$V14, pred_loo)
(1 - sum(diag(table(wina$V14, pred_loo))) / nrow(wina))
# 10CV
v <- 10
blad_i <- numeric(v)
krok <- floor(nrow(wina) / v)
permutacja <- sample(1:nrow(wina))
temp <- 0
for (i in 1:v) {
if (i != v) {
obs_temp <- permutacja[(temp + 1):(i * krok)]
temp <- i * krok
} else {
obs_temp <- permutacja[(temp + 1):nrow(wina)]
}
model_lda_i <- lda(V14 ~ V1 + V2 + V3, data = wina[-obs_temp, ])
pred_i <- stats::predict(model_lda_i, wina[obs_temp, ])$class
blad_i[i] <- sum(pred_i != wina$V14[obs_temp])
}
sum(blad_i) / nrow(wina)
# bootstrap
n_boot <- 100
temp_boot <- numeric(n_boot)
set.seed(1234)
for (i in 1:n_boot) {
numery <- sample(1:nrow(wina), replace = TRUE)
model_lda_i <- lda(V14 ~ V1 + V2 + V3, data = wina[numery, ])
temp_boot[i] <- mean(stats::predict(model_lda_i, wina[-numery, ])$class != wina[-numery, ]$V14)
}
mean(temp_boot)
# w pakiecie caret dla sprawdzenia
library(caret)
ctrl_boot <- trainControl(method = 'boot',
number = 100,
search = 'grid')
ctrl_loo <- trainControl(method = 'LOOCV',
search = 'grid')
ctrl_10CV <- trainControl(method = "repeatedcv",
number = 10,
repeats = 10)
train(V14 ~ V1 + V2 + V3, data = wina, method = 'lda', trControl = ctrl_boot)
train(V14 ~ V1 + V2 + V3, data = wina, method = 'lda', trControl = ctrl_loo)
train(V14 ~ V1 + V2 + V3, data = wina, method = 'lda', trControl = ctrl_10CV)
wybrane <- wina[c(20, 50, 150, 100), 1:3]
rownames(wybrane) <- 1:4
library(knitr)
kable(wybrane, align = c('c', 'c', 'c'))
new_data <- data.frame(V1 = c(13.64, 13.94, 13.08, 12.29),
V2 = c(3.1, 1.73, 3.9, 3.17),
V3 = c(2.56, 2.27, 2.36, 2.21))
stats::predict(model_lda, new_data)