147 lines
3.9 KiB
R
147 lines
3.9 KiB
R
|
# ZAD 1
|
||
|
library(MASS)
|
||
|
|
||
|
v <- 10
|
||
|
blad_i <- numeric(v)
|
||
|
krok <- floor(nrow(iris) / v)
|
||
|
permutacja <- sample(1:nrow(iris))
|
||
|
temp <- 0
|
||
|
for (i in 1:v) {
|
||
|
if (i != v) {
|
||
|
obs_temp <- permutacja[(temp + 1):(i * krok)]
|
||
|
temp <- i * krok
|
||
|
} else {
|
||
|
obs_temp <- permutacja[(temp + 1):nrow(iris)]
|
||
|
}
|
||
|
model_lda_i <- lda(Species ~ ., data = iris[-obs_temp, ])
|
||
|
pred_i <- stats::predict(model_lda_i, iris[obs_temp, ])$class
|
||
|
blad_i[i] <- sum(pred_i != iris$Species[obs_temp])
|
||
|
}
|
||
|
sum(blad_i) / nrow(iris)
|
||
|
|
||
|
|
||
|
# bootstrap
|
||
|
n_boot <- 100
|
||
|
temp_boot <- numeric(n_boot)
|
||
|
set.seed(1234)
|
||
|
for (i in 1:n_boot) {
|
||
|
numery <- sample(1:nrow(iris), replace = TRUE)
|
||
|
model_lda_i <- lda(Species ~ ., data = iris[numery, ])
|
||
|
temp_boot[i] <- mean(stats::predict(model_lda_i, iris[-numery, ])$class != iris[-numery, ]$Species)
|
||
|
}
|
||
|
mean(temp_boot)
|
||
|
|
||
|
|
||
|
|
||
|
# w pakiecie caret dla sprawdzenia
|
||
|
install.packages("caret")
|
||
|
library(caret)
|
||
|
ctrl_boot <- trainControl(method = 'boot',
|
||
|
number = 100,
|
||
|
search = 'grid')
|
||
|
ctrl_loo <- trainControl(method = 'LOOCV',
|
||
|
search = 'grid')
|
||
|
ctrl_10CV <- trainControl(method = "repeatedcv",
|
||
|
number = 10,
|
||
|
repeats = 10)
|
||
|
train(Species ~ ., data = iris, method = 'lda', trControl = ctrl_boot)
|
||
|
train(Species ~ ., data = iris, method = 'lda', trControl = ctrl_loo)
|
||
|
train(Species ~ ., data = iris, method = 'lda', trControl = ctrl_10CV)
|
||
|
|
||
|
# ZAD 2
|
||
|
wina <- read.table('http://ls.home.amu.edu.pl/data_sets/wina.txt')
|
||
|
head(wina)
|
||
|
cat("...")
|
||
|
wina$V14 <- as.factor(wina$V14)
|
||
|
|
||
|
dim(wina)
|
||
|
table(wina$V14)
|
||
|
|
||
|
library(MASS)
|
||
|
model_lda <- lda(V14 ~ V1 + V2 + V3, data = wina)
|
||
|
model_lda$prior
|
||
|
model_lda$means
|
||
|
model_lda$scaling
|
||
|
|
||
|
head(stats::predict(model_lda)$posterior)
|
||
|
head(stats::predict(model_lda)$class)
|
||
|
(conf_matrix <- table(stats::predict(model_lda)$class, wina$V14))
|
||
|
|
||
|
|
||
|
|
||
|
(1 - sum(diag(conf_matrix)) / nrow(wina))
|
||
|
|
||
|
pred_loo <- numeric(nrow(wina))
|
||
|
for (i in 1:nrow(wina)) {
|
||
|
model_lda_i <- lda(V14 ~ V1 + V2 + V3, data = wina[-i, ])
|
||
|
pred_loo[i] <- stats::predict(model_lda_i, wina[i, ])$class
|
||
|
}
|
||
|
table(wina$V14, pred_loo)
|
||
|
(1 - sum(diag(table(wina$V14, pred_loo))) / nrow(wina))
|
||
|
|
||
|
|
||
|
|
||
|
# 10CV
|
||
|
v <- 10
|
||
|
blad_i <- numeric(v)
|
||
|
krok <- floor(nrow(wina) / v)
|
||
|
permutacja <- sample(1:nrow(wina))
|
||
|
temp <- 0
|
||
|
for (i in 1:v) {
|
||
|
if (i != v) {
|
||
|
obs_temp <- permutacja[(temp + 1):(i * krok)]
|
||
|
temp <- i * krok
|
||
|
} else {
|
||
|
obs_temp <- permutacja[(temp + 1):nrow(wina)]
|
||
|
}
|
||
|
model_lda_i <- lda(V14 ~ V1 + V2 + V3, data = wina[-obs_temp, ])
|
||
|
pred_i <- stats::predict(model_lda_i, wina[obs_temp, ])$class
|
||
|
blad_i[i] <- sum(pred_i != wina$V14[obs_temp])
|
||
|
}
|
||
|
sum(blad_i) / nrow(wina)
|
||
|
|
||
|
|
||
|
|
||
|
# bootstrap
|
||
|
n_boot <- 100
|
||
|
temp_boot <- numeric(n_boot)
|
||
|
set.seed(1234)
|
||
|
for (i in 1:n_boot) {
|
||
|
numery <- sample(1:nrow(wina), replace = TRUE)
|
||
|
model_lda_i <- lda(V14 ~ V1 + V2 + V3, data = wina[numery, ])
|
||
|
temp_boot[i] <- mean(stats::predict(model_lda_i, wina[-numery, ])$class != wina[-numery, ]$V14)
|
||
|
}
|
||
|
mean(temp_boot)
|
||
|
|
||
|
|
||
|
|
||
|
# w pakiecie caret dla sprawdzenia
|
||
|
library(caret)
|
||
|
ctrl_boot <- trainControl(method = 'boot',
|
||
|
number = 100,
|
||
|
search = 'grid')
|
||
|
ctrl_loo <- trainControl(method = 'LOOCV',
|
||
|
search = 'grid')
|
||
|
ctrl_10CV <- trainControl(method = "repeatedcv",
|
||
|
number = 10,
|
||
|
repeats = 10)
|
||
|
train(V14 ~ V1 + V2 + V3, data = wina, method = 'lda', trControl = ctrl_boot)
|
||
|
train(V14 ~ V1 + V2 + V3, data = wina, method = 'lda', trControl = ctrl_loo)
|
||
|
train(V14 ~ V1 + V2 + V3, data = wina, method = 'lda', trControl = ctrl_10CV)
|
||
|
|
||
|
|
||
|
wybrane <- wina[c(20, 50, 150, 100), 1:3]
|
||
|
rownames(wybrane) <- 1:4
|
||
|
library(knitr)
|
||
|
kable(wybrane, align = c('c', 'c', 'c'))
|
||
|
|
||
|
|
||
|
|
||
|
new_data <- data.frame(V1 = c(13.64, 13.94, 13.08, 12.29),
|
||
|
V2 = c(3.1, 1.73, 3.9, 3.17),
|
||
|
V3 = c(2.56, 2.27, 2.36, 2.21))
|
||
|
stats::predict(model_lda, new_data)
|
||
|
|
||
|
|
||
|
|