computers <- read.table("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/computers.csv") View(computers) computers <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/computers.csv") View(computers) spotify <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/spotify.csv") View(spotify) weight_height <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/weight-height.csv") View(weight_height) model_1 <- lm(valence ~ acusticness + danceability + energy + instrumentalness + liveness, data = spotify) model_1 <- lm(valence ~ acousticness + danceability + energy + instrumentalness + liveness, data = spotify) model_1 <- lm(valence ~ acousticness + danceability + energy + instrumentalness + liveness + loudness + speechiness + tempo + song_title, data = spotify) model_1 summary(model_1) model_1 <- lm(valence ~ acousticness + danceability + energy + instrumentalness + liveness + loudness + speechiness + tempo, data = spotify) summary(model_1) step(model_1) step(model_1) step(model_1) summary(model_1) model_2 <- lm(valence ~ acousticness + danceability + energy + instrumentalness + liveness + loudness + speechiness, data = spotify) new_data <- data.frame(acousticness=2.84e-06, danceability=0.305, energy=0.827, instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789, speechiness=0.1470) View(new_data) stats::predict(model_2, new_data, interval = "prediction") summary(model_2)$adj.r.squared new_data <- data.frame(acousticness=2.84e-06, danceability=0.305, energy=0.827, instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789, speechiness=0.1470, tempo=159.882) stats::predict(model_2, new_data, interval = "prediction") new_data <- data.frame(acousticness=2.84e-06, danceability=1.305, energy=0.827, instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789, speechiness=0.1470, tempo=159.882) stats::predict(model_2, new_data, interval = "prediction") new_data <- data.frame(acousticness=2.84e-06, danceability=0.305, energy=0.827, instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789, speechiness=0.1470, tempo=159.882) stats::predict(model_2, new_data, interval = "prediction") new_data <- data.frame(acousticness=2.84e-06, danceability=0.405, energy=0.827, instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789, speechiness=0.1470, tempo=159.882) stats::predict(model_2, new_data, interval = "prediction") new_data <- data.frame(acousticness=2.84e-06, danceability=0.305, energy=0.827, instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789, speechiness=0.1470, tempo=159.882) stats::predict(model_2, new_data, interval = "prediction") new_data <- data.frame(acousticness=2.84e-06, danceability=0.405, energy=0.827, instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789, speechiness=0.1470, tempo=159.882) stats::predict(model_2, new_data, interval = "prediction") 0.3918359 - 0.3918359 0.3918359 - 0.3229826 male <- ifelse(weight_height$Gender == "Male") male <- weight_height$Gender == "Male" male <- ifelse(weight_height$Gender == "Male", weight_height$Height) male <- ifelse(weight_height$Gender == "Male", weight_height$Height, 0) male <- weight_height[weight_height$Gender == "Male"] male <- weight_height[weight_height$Gender == "Male", ] View(male) shapiro.test(male$Height) qqnorm(male$Height) shapiro.test(male$Weight) qqnorm(male$Weight) shapiro.test(male$Height) qqnorm(male$Height) mean(male$Height) par(mfrow = c(1, 2)) female <- weight_height[weight_height$Gender == "Female", ] shapiro.test(female$Height) qqnorm(female$Height) mean(female$Height) par(mfrow = c(1, 2)) male <- weight_height[weight_height$Gender == "Male", ] shapiro.test(male$Height) qqnorm(male$Height) mean(male$Height) female <- weight_height[weight_height$Gender == "Female", ] shapiro.test(female$Height) qqnorm(female$Height) mean(female$Height) par(mfrow = c(1, 2)) male <- weight_height[weight_height$Gender == "Male", ] shapiro.test(male$Height) qqnorm(male$Height) mean(male$Height) var(male$Height) female <- weight_height[weight_height$Gender == "Female", ] shapiro.test(female$Height) qqnorm(female$Height) mean(female$Height) var(female$Height) t.test(male$Height, female$Height, var.equal = TRUE, alternative = 'greater')$p.value t.test(male$Height, female$Height, var.equal = TRUE, alternative = 'greater') t.test(male$Height, female$Height, paired = TRUE, alternative = 'less')$p.value t.test(male$Height, female$Height, paired = TRUE, alternative = 'greater')$p.value t.test(male$Height, female$Height, alternative = 'greater')$p.value t.test(male$Height, female$Height, alternative = 'greater') t.test(male$Height, female$Height, alternative = 'greater', conf.level = 0.05)$p.value selected <- computers[computers$screen == 14, ] View(selected) ram_procent <- data.frame(cbind(liczebnosc = table(selected$ram), procent = prop.table(selected$ram))) table(selected$ram) prop.table(selected$ram) table(selected$ram) liczebnosc <- table(selected$ram) prop.table(liczebnosc) prop.table(liczebnosc)*100 # ZAD 2 - tego trochę nie rozumiem w_test <- function(x, istotnosc, delta_zero, alternative = c('two.sided', 'less', 'greater')) { # statystyka testowa ss <- (1 / length(x)) * (var(x) - mean(x)) statistic <- length(x) * ss / delta_zero * delta_zero # parametr w obszarach krytycznych d <- length(x) - 1 # poziom istotności alternative <- match.arg(alternative) p_value <- istotnosc p_value <- switch(alternative, 'two.sided' = 2 * min(p_value, 1 - p_value), 'greater' = p_value, 'less' = 1 - p_value) # rezultat names(statistic) <- 'T' names(d) <- 'num df' result <- list(statistic = statistic, parameter = d, p.value = p_value, alternative = alternative, method = 'Test istotności dla wariancji w modelu normalnym', data.name = deparse(substitute(x))) class(result) <- 'htest' return(result) } rok <- 1995:2002 liczba_przypadkow <- c(39.7, 38.2, 34.7, 33.1, 30.1, 28.4, 26.3, 24.7) data_set <- data.frame(rok = rok, liczba_przypadkow = liczba_przypadkow) plot(data_set, main = "Wykres rozrzutu", pch = 16) model <- lm(liczba_przypadkow ~ rok, data = data_set) model$coefficients plot(data_set, main = "Wykres rozrzutu", pch = 16) abline(model, col = "red", lwd = 2) coef(model) confint(model) summary(model) fitted(model) residuals(model) temp_rok <- data.frame(rok = seq(min(data_set$rok) - 10, max(data_set$rok) + 10, length = 100)) pred <- stats::predict(model, temp_rok, interval = "prediction") plot(data_set, main = "Wykres rozrzutu", pch = 16) abline(model, col = "red", lwd = 2) lines(temp_rok$rok, pred[, 2], lty = 2, col = "red") lines(temp_rok$rok, pred[, 3], lty = 2, col = "red") temp_rok <- data.frame(rok = seq(min(data_set$rok), max(data_set$rok), length = 100)) pred <- stats::predict(model, temp_rok, interval = "prediction") plot(data_set, main = "Wykres rozrzutu", pch = 16) abline(model, col = "red", lwd = 2) lines(temp_rok$rok, pred[, 2], lty = 2, col = "red") lines(temp_rok$rok, pred[, 3], lty = 2, col = "red") temp_rok <- data.frame(rok = seq(min(data_set$rok) - 10, max(data_set$rok) + 10, length = 100)) pred <- stats::predict(model, temp_rok, interval = "prediction") plot(data_set, main = "Wykres rozrzutu", pch = 16) abline(model, col = "red", lwd = 2) lines(temp_rok$rok, pred[, 2], lty = 2, col = "red") lines(temp_rok$rok, pred[, 3], lty = 2, col = "red") new_rok <- data.frame(rok = 2003:2007) (pred_2003_2007 <- stats::predict(model, new_rok, interval = 'prediction')) plot(data_set, main = "Wykres rozrzutu z predykcją na lata 2003-2007", pch = 16, xlim = c(1995, 2007), ylim = c(10, 40)) abline(model, col = "red", lwd = 2) points(2003:2007, pred_2003_2007[, 1], col = "blue", pch = 16) temp_rok <- data.frame(rok = seq(1994, 2008, length = 100)) pred <- stats::predict(model, temp_rok, interval = "prediction") lines(temp_rok$rok, pred[, 2], lty = 2, col = "red") lines(temp_rok$rok, pred[, 3], lty = 2, col = "red") load(url("http://ls.home.amu.edu.pl/data_sets/liver_data.RData")) head(liver_data) liver_data$condition <- ifelse(liver_data$condition == "Yes", 1, 0) model_1 <- glm(condition ~ bilirubin + ldh, data = liver_data, family = 'binomial') model_1 summary(model_1) step(model_1) exp(coef(model_1)[2]) exp(coef(model_1)[3]) install.packages("ROCR") library(ROCR) pred_1 <- prediction(model_1$fitted, liver_data$condition) plot(performance(pred_1, 'tpr', 'fpr'), main = "Model 1") performance(pred_1, 'auc')@y.values liver_data_new <- data.frame(bilirubin = c(0.9, 2.1, 3.4), ldh = c(100, 200, 300)) (predict_glm <- stats::predict(model_1, liver_data_new, type = 'response')) model_1_hat <- coef(model_1)[1] + coef(model_1)[2] * liver_data$bilirubin + coef(model_1)[3] * liver_data$ldh model_1_temp <- seq(min(model_1_hat) - 1, max(model_1_hat) + 2.5, length.out = 100) condition_temp <- exp(model_1_temp) / (1 + exp(model_1_temp)) plot(model_1_temp, condition_temp, type = "l", xlab = "X beta", ylab = "condition", xlim = c(-6, 9), ylim = c(-0.1, 1.1)) points(model_1_hat, liver_data$condition, pch = 16) points(coef(model_1)[1] + coef(model_1)[2] * liver_data_new$bilirubin + coef(model_1)[3] * liver_data_new$ldh, predict_glm, pch = 16, col = "red") pred_1 <- prediction(model_1$fitted, liver_data$condition) plot(performance(pred_1, 'tpr', 'fpr'), main = "Model 1") performance(pred_1, 'auc')@y.values summary(model_1) model_1$coefficients summary(model_1)$adj.r.squared x1<-rexp(30,5) x2<-rnorm(30,2,2) x3<-rnorm(30,10,1) gen<-c(x1,x2,x3) # wykresy gęstości jądrowych przy różnych szerokościach okna par(mfrow=c(2,2)) plot(density(gen,bw=0.1)) plot(density(gen,bw=0.5)) plot(density(gen,bw=3)) plot(density(gen,bw=5)) #na trzecim #ZAD1 head(USArrests) pairs(USArrests) #UrbanPop jest najsłabiej skorelowana z pozostałymi cor.test(USArrests$Murder,USArrests$UrbanPop, method="pearson") cor.test(USArrests$Rape,USArrests$UrbanPop, method="pearson") (pca_1 <- prcomp(~ Murder + Assault + Rape, data = USArrests, scale = TRUE)) summary(pca_1) head(pca_1$x) cat("...") pca_1$rotation par(mfrow = c(1, 2)) matplot(pca_1$rotation, type = 'l', lty = 1, lwd = 2, xlab = 'zmienne', ylab = 'ładunki', ylim = c(-0.9, 1.05), xaxt = "n") axis(1, at = 1:3, labels = rownames(pca_1$rotation)) legend('topleft', legend = c('PC1', 'PC2', 'PC3'), ncol = 3, col = 1:3, lwd = 2) text(rep(1, 3), pca_1$rotation[1, ], round(pca_1$rotation[1, ], 2), pos = 4) text(rep(2, 3), pca_1$rotation[2, ], round(pca_1$rotation[2, ], 2), pos = 1) text(rep(3, 3), pca_1$rotation[3, ], round(pca_1$rotation[3, ], 2), pos = 2) matplot(abs(pca_1$rotation), type = 'l', lty = 1, lwd = 2, xlab = 'zmienne', ylab = '|ładunki|', ylim = c(0, 1.05), xaxt = "n") axis(1, at = 1:3, labels = rownames(pca_1$rotation)) legend('topleft', legend = c('PC1', 'PC2', 'PC3'), ncol = 3, col = 1:3, lwd = 2) text(rep(1, 3), abs(pca_1$rotation)[1, ], abs(round(pca_1$rotation[1, ], 2)), pos = 4) text(rep(2, 3), abs(pca_1$rotation)[2, ], abs(round(pca_1$rotation[2, ], 2)), pos = 1) text(rep(3, 3), abs(pca_1$rotation)[3, ], abs(round(pca_1$rotation[3, ], 2)), pos = 2) plot(pca_1) par(mfrow = c(1, 1)) plot(pca_1) pca_1$sdev^2 mean(pca_1$sdev^2) biplot(pca_1) library(ape) plot(mst(dist(scale(USArrests[, -3]))), x1 = pca_1$x[, 1], x2 = pca_1$x[, 2])