v <- 10 blad_i <- numeric(v) krok <- floor(nrow(iris) / v) permutacja <- sample(1:nrow(iris)) temp <- 0 for (i in 1:v) { if (i != v) { obs_temp <- permutacja[(temp + 1):(i * krok)] temp <- i * krok } else { obs_temp <- permutacja[(temp + 1):nrow(iris)] } model_lda_i <- lda(Species ~ ., data = iris[-obs_temp, ]) pred_i <- stats::predict(model_lda_i, iris[obs_temp, ])$class blad_i[i] <- sum(pred_i != iris$Species[obs_temp]) } sum(blad_i) / nrow(iris) library(MASS) v <- 10 blad_i <- numeric(v) krok <- floor(nrow(iris) / v) permutacja <- sample(1:nrow(iris)) temp <- 0 for (i in 1:v) { if (i != v) { obs_temp <- permutacja[(temp + 1):(i * krok)] temp <- i * krok } else { obs_temp <- permutacja[(temp + 1):nrow(iris)] } model_lda_i <- lda(Species ~ ., data = iris[-obs_temp, ]) pred_i <- stats::predict(model_lda_i, iris[obs_temp, ])$class blad_i[i] <- sum(pred_i != iris$Species[obs_temp]) } sum(blad_i) / nrow(iris) # bootstrap n_boot <- 100 temp_boot <- numeric(n_boot) set.seed(1234) for (i in 1:n_boot) { numery <- sample(1:nrow(iris), replace = TRUE) model_lda_i <- lda(Species ~ ., data = iris[numery, ]) temp_boot[i] <- mean(stats::predict(model_lda_i, iris[-numery, ])$class != iris[-numery, ]$Species) } mean(temp_boot) # w pakiecie caret dla sprawdzenia library(caret) ctrl_boot <- trainControl(method = 'boot', number = 100, search = 'grid') ctrl_loo <- trainControl(method = 'LOOCV', search = 'grid') ctrl_10CV <- trainControl(method = "repeatedcv", number = 10, repeats = 10) train(Species ~ ., data = iris, method = 'lda', trControl = ctrl_boot) train(Species ~ ., data = iris, method = 'lda', trControl = ctrl_loo) train(Species ~ ., data = iris, method = 'lda', trControl = ctrl_10CV) library(caret) install.packages("caret") library(caret) ctrl_boot <- trainControl(method = 'boot', number = 100, search = 'grid') ctrl_loo <- trainControl(method = 'LOOCV', search = 'grid') ctrl_10CV <- trainControl(method = "repeatedcv", number = 10, repeats = 10) train(Species ~ ., data = iris, method = 'lda', trControl = ctrl_boot) train(Species ~ ., data = iris, method = 'lda', trControl = ctrl_loo) train(Species ~ ., data = iris, method = 'lda', trControl = ctrl_10CV) wina <- read.table('http://ls.home.amu.edu.pl/data_sets/wina.txt') head(wina) cat("...") wina$V14 <- as.factor(wina$V14) dim(wina) table(wina$V14) model_lda <- lda(V14 ~ V1 + V2 + V3, data = wina) model_lda$prior model_lda$means model_lda$scaling head(stats::predict(model_lda)$posterior) head(stats::predict(model_lda)$class) (conf_matrix <- table(stats::predict(model_lda)$class, wina$V14)) (1 - sum(diag(conf_matrix)) / nrow(wina)) (1 - sum(diag(conf_matrix)) / nrow(wina)) pred_loo <- numeric(nrow(wina)) for (i in 1:nrow(wina)) { model_lda_i <- lda(V14 ~ V1 + V2 + V3, data = wina[-i, ]) pred_loo[i] <- stats::predict(model_lda_i, wina[i, ])$class } table(wina$V14, pred_loo) (1 - sum(diag(table(wina$V14, pred_loo))) / nrow(wina)) # 10CV v <- 10 blad_i <- numeric(v) krok <- floor(nrow(wina) / v) permutacja <- sample(1:nrow(wina)) temp <- 0 for (i in 1:v) { if (i != v) { obs_temp <- permutacja[(temp + 1):(i * krok)] temp <- i * krok } else { obs_temp <- permutacja[(temp + 1):nrow(wina)] } model_lda_i <- lda(V14 ~ V1 + V2 + V3, data = wina[-obs_temp, ]) pred_i <- stats::predict(model_lda_i, wina[obs_temp, ])$class blad_i[i] <- sum(pred_i != wina$V14[obs_temp]) } sum(blad_i) / nrow(wina) # bootstrap n_boot <- 100 temp_boot <- numeric(n_boot) set.seed(1234) for (i in 1:n_boot) { numery <- sample(1:nrow(wina), replace = TRUE) model_lda_i <- lda(V14 ~ V1 + V2 + V3, data = wina[numery, ]) temp_boot[i] <- mean(stats::predict(model_lda_i, wina[-numery, ])$class != wina[-numery, ]$V14) } mean(temp_boot) # w pakiecie caret dla sprawdzenia library(caret) ctrl_boot <- trainControl(method = 'boot', number = 100, search = 'grid') ctrl_loo <- trainControl(method = 'LOOCV', search = 'grid') ctrl_10CV <- trainControl(method = "repeatedcv", number = 10, repeats = 10) train(V14 ~ V1 + V2 + V3, data = wina, method = 'lda', trControl = ctrl_boot) train(V14 ~ V1 + V2 + V3, data = wina, method = 'lda', trControl = ctrl_loo) train(V14 ~ V1 + V2 + V3, data = wina, method = 'lda', trControl = ctrl_10CV) wybrane <- wina[c(20, 50, 150, 100), 1:3] rownames(wybrane) <- 1:4 library(knitr) kable(wybrane, align = c('c', 'c', 'c')) new_data <- data.frame(V1 = c(13.64, 13.94, 13.08, 12.29), V2 = c(3.1, 1.73, 3.9, 3.17), V3 = c(2.56, 2.27, 2.36, 2.21)) stats::predict(model_lda, new_data)