Statystyka/testowe/.Rhistory

268 lines
11 KiB
R

computers <- read.table("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/computers.csv")
View(computers)
computers <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/computers.csv")
View(computers)
spotify <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/spotify.csv")
View(spotify)
weight_height <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/weight-height.csv")
View(weight_height)
model_1 <- lm(valence ~ acusticness + danceability + energy + instrumentalness +
liveness, data = spotify)
model_1 <- lm(valence ~ acousticness + danceability + energy + instrumentalness +
liveness, data = spotify)
model_1 <- lm(valence ~ acousticness + danceability + energy + instrumentalness +
liveness + loudness + speechiness + tempo + song_title, data = spotify)
model_1
summary(model_1)
model_1 <- lm(valence ~ acousticness + danceability + energy + instrumentalness +
liveness + loudness + speechiness + tempo, data = spotify)
summary(model_1)
step(model_1)
step(model_1)
step(model_1)
summary(model_1)
model_2 <- lm(valence ~ acousticness + danceability + energy + instrumentalness +
liveness + loudness + speechiness, data = spotify)
new_data <- data.frame(acousticness=2.84e-06, danceability=0.305, energy=0.827,
instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789, speechiness=0.1470)
View(new_data)
stats::predict(model_2, new_data, interval = "prediction")
summary(model_2)$adj.r.squared
new_data <- data.frame(acousticness=2.84e-06, danceability=0.305, energy=0.827,
instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789,
speechiness=0.1470, tempo=159.882)
stats::predict(model_2, new_data, interval = "prediction")
new_data <- data.frame(acousticness=2.84e-06, danceability=1.305, energy=0.827,
instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789,
speechiness=0.1470, tempo=159.882)
stats::predict(model_2, new_data, interval = "prediction")
new_data <- data.frame(acousticness=2.84e-06, danceability=0.305, energy=0.827,
instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789,
speechiness=0.1470, tempo=159.882)
stats::predict(model_2, new_data, interval = "prediction")
new_data <- data.frame(acousticness=2.84e-06, danceability=0.405, energy=0.827,
instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789,
speechiness=0.1470, tempo=159.882)
stats::predict(model_2, new_data, interval = "prediction")
new_data <- data.frame(acousticness=2.84e-06, danceability=0.305, energy=0.827,
instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789,
speechiness=0.1470, tempo=159.882)
stats::predict(model_2, new_data, interval = "prediction")
new_data <- data.frame(acousticness=2.84e-06, danceability=0.405, energy=0.827,
instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789,
speechiness=0.1470, tempo=159.882)
stats::predict(model_2, new_data, interval = "prediction")
0.3918359 - 0.3918359
0.3918359 - 0.3229826
male <- ifelse(weight_height$Gender == "Male")
male <- weight_height$Gender == "Male"
male <- ifelse(weight_height$Gender == "Male", weight_height$Height)
male <- ifelse(weight_height$Gender == "Male", weight_height$Height, 0)
male <- weight_height[weight_height$Gender == "Male"]
male <- weight_height[weight_height$Gender == "Male", ]
View(male)
shapiro.test(male$Height)
qqnorm(male$Height)
shapiro.test(male$Weight)
qqnorm(male$Weight)
shapiro.test(male$Height)
qqnorm(male$Height)
mean(male$Height)
par(mfrow = c(1, 2))
female <- weight_height[weight_height$Gender == "Female", ]
shapiro.test(female$Height)
qqnorm(female$Height)
mean(female$Height)
par(mfrow = c(1, 2))
male <- weight_height[weight_height$Gender == "Male", ]
shapiro.test(male$Height)
qqnorm(male$Height)
mean(male$Height)
female <- weight_height[weight_height$Gender == "Female", ]
shapiro.test(female$Height)
qqnorm(female$Height)
mean(female$Height)
par(mfrow = c(1, 2))
male <- weight_height[weight_height$Gender == "Male", ]
shapiro.test(male$Height)
qqnorm(male$Height)
mean(male$Height)
var(male$Height)
female <- weight_height[weight_height$Gender == "Female", ]
shapiro.test(female$Height)
qqnorm(female$Height)
mean(female$Height)
var(female$Height)
t.test(male$Height, female$Height, var.equal = TRUE, alternative = 'greater')$p.value
t.test(male$Height, female$Height, var.equal = TRUE, alternative = 'greater')
t.test(male$Height, female$Height, paired = TRUE, alternative = 'less')$p.value
t.test(male$Height, female$Height, paired = TRUE, alternative = 'greater')$p.value
t.test(male$Height, female$Height, alternative = 'greater')$p.value
t.test(male$Height, female$Height, alternative = 'greater')
t.test(male$Height, female$Height, alternative = 'greater', conf.level = 0.05)$p.value
selected <- computers[computers$screen == 14, ]
View(selected)
ram_procent <- data.frame(cbind(liczebnosc = table(selected$ram),
procent = prop.table(selected$ram)))
table(selected$ram)
prop.table(selected$ram)
table(selected$ram)
liczebnosc <- table(selected$ram)
prop.table(liczebnosc)
prop.table(liczebnosc)*100
# ZAD 2 - tego trochę nie rozumiem
w_test <- function(x, istotnosc, delta_zero, alternative = c('two.sided', 'less', 'greater')) {
# statystyka testowa
ss <- (1 / length(x)) * (var(x) - mean(x))
statistic <- length(x) * ss / delta_zero * delta_zero
# parametr w obszarach krytycznych
d <- length(x) - 1
# poziom istotności
alternative <- match.arg(alternative)
p_value <- istotnosc
p_value <- switch(alternative,
'two.sided' = 2 * min(p_value, 1 - p_value),
'greater' = p_value,
'less' = 1 - p_value)
# rezultat
names(statistic) <- 'T'
names(d) <- 'num df'
result <- list(statistic = statistic,
parameter = d,
p.value = p_value,
alternative = alternative,
method = 'Test istotności dla wariancji w modelu normalnym',
data.name = deparse(substitute(x)))
class(result) <- 'htest'
return(result)
}
rok <- 1995:2002
liczba_przypadkow <- c(39.7, 38.2, 34.7, 33.1, 30.1, 28.4, 26.3, 24.7)
data_set <- data.frame(rok = rok, liczba_przypadkow = liczba_przypadkow)
plot(data_set, main = "Wykres rozrzutu", pch = 16)
model <- lm(liczba_przypadkow ~ rok, data = data_set)
model$coefficients
plot(data_set, main = "Wykres rozrzutu", pch = 16)
abline(model, col = "red", lwd = 2)
coef(model)
confint(model)
summary(model)
fitted(model)
residuals(model)
temp_rok <- data.frame(rok = seq(min(data_set$rok) - 10,
max(data_set$rok) + 10,
length = 100))
pred <- stats::predict(model, temp_rok, interval = "prediction")
plot(data_set, main = "Wykres rozrzutu", pch = 16)
abline(model, col = "red", lwd = 2)
lines(temp_rok$rok, pred[, 2], lty = 2, col = "red")
lines(temp_rok$rok, pred[, 3], lty = 2, col = "red")
temp_rok <- data.frame(rok = seq(min(data_set$rok),
max(data_set$rok),
length = 100))
pred <- stats::predict(model, temp_rok, interval = "prediction")
plot(data_set, main = "Wykres rozrzutu", pch = 16)
abline(model, col = "red", lwd = 2)
lines(temp_rok$rok, pred[, 2], lty = 2, col = "red")
lines(temp_rok$rok, pred[, 3], lty = 2, col = "red")
temp_rok <- data.frame(rok = seq(min(data_set$rok) - 10,
max(data_set$rok) + 10,
length = 100))
pred <- stats::predict(model, temp_rok, interval = "prediction")
plot(data_set, main = "Wykres rozrzutu", pch = 16)
abline(model, col = "red", lwd = 2)
lines(temp_rok$rok, pred[, 2], lty = 2, col = "red")
lines(temp_rok$rok, pred[, 3], lty = 2, col = "red")
new_rok <- data.frame(rok = 2003:2007)
(pred_2003_2007 <- stats::predict(model, new_rok, interval = 'prediction'))
plot(data_set, main = "Wykres rozrzutu z predykcją na lata 2003-2007", pch = 16,
xlim = c(1995, 2007), ylim = c(10, 40))
abline(model, col = "red", lwd = 2)
points(2003:2007, pred_2003_2007[, 1], col = "blue", pch = 16)
temp_rok <- data.frame(rok = seq(1994, 2008, length = 100))
pred <- stats::predict(model, temp_rok, interval = "prediction")
lines(temp_rok$rok, pred[, 2], lty = 2, col = "red")
lines(temp_rok$rok, pred[, 3], lty = 2, col = "red")
load(url("http://ls.home.amu.edu.pl/data_sets/liver_data.RData"))
head(liver_data)
liver_data$condition <- ifelse(liver_data$condition == "Yes", 1, 0)
model_1 <- glm(condition ~ bilirubin + ldh, data = liver_data, family = 'binomial')
model_1
summary(model_1)
step(model_1)
exp(coef(model_1)[2])
exp(coef(model_1)[3])
install.packages("ROCR")
library(ROCR)
pred_1 <- prediction(model_1$fitted, liver_data$condition)
plot(performance(pred_1, 'tpr', 'fpr'), main = "Model 1")
performance(pred_1, 'auc')@y.values
liver_data_new <- data.frame(bilirubin = c(0.9, 2.1, 3.4), ldh = c(100, 200, 300))
(predict_glm <- stats::predict(model_1,
liver_data_new,
type = 'response'))
model_1_hat <- coef(model_1)[1] +
coef(model_1)[2] * liver_data$bilirubin +
coef(model_1)[3] * liver_data$ldh
model_1_temp <- seq(min(model_1_hat) - 1, max(model_1_hat) + 2.5, length.out = 100)
condition_temp <- exp(model_1_temp) / (1 + exp(model_1_temp))
plot(model_1_temp, condition_temp, type = "l", xlab = "X beta", ylab = "condition",
xlim = c(-6, 9), ylim = c(-0.1, 1.1))
points(model_1_hat, liver_data$condition, pch = 16)
points(coef(model_1)[1] +
coef(model_1)[2] * liver_data_new$bilirubin +
coef(model_1)[3] * liver_data_new$ldh,
predict_glm, pch = 16, col = "red")
pred_1 <- prediction(model_1$fitted, liver_data$condition)
plot(performance(pred_1, 'tpr', 'fpr'), main = "Model 1")
performance(pred_1, 'auc')@y.values
summary(model_1)
model_1$coefficients
summary(model_1)$adj.r.squared
x1<-rexp(30,5)
x2<-rnorm(30,2,2)
x3<-rnorm(30,10,1)
gen<-c(x1,x2,x3)
# wykresy gęstości jądrowych przy różnych szerokościach okna
par(mfrow=c(2,2))
plot(density(gen,bw=0.1))
plot(density(gen,bw=0.5))
plot(density(gen,bw=3))
plot(density(gen,bw=5)) #na trzecim
#ZAD1
head(USArrests)
pairs(USArrests)
#UrbanPop jest najsłabiej skorelowana z pozostałymi
cor.test(USArrests$Murder,USArrests$UrbanPop, method="pearson")
cor.test(USArrests$Rape,USArrests$UrbanPop, method="pearson")
(pca_1 <- prcomp(~ Murder + Assault + Rape, data = USArrests, scale = TRUE))
summary(pca_1)
head(pca_1$x)
cat("...")
pca_1$rotation
par(mfrow = c(1, 2))
matplot(pca_1$rotation, type = 'l', lty = 1, lwd = 2,
xlab = 'zmienne', ylab = 'ładunki', ylim = c(-0.9, 1.05),
xaxt = "n")
axis(1, at = 1:3, labels = rownames(pca_1$rotation))
legend('topleft', legend = c('PC1', 'PC2', 'PC3'), ncol = 3, col = 1:3, lwd = 2)
text(rep(1, 3), pca_1$rotation[1, ], round(pca_1$rotation[1, ], 2), pos = 4)
text(rep(2, 3), pca_1$rotation[2, ], round(pca_1$rotation[2, ], 2), pos = 1)
text(rep(3, 3), pca_1$rotation[3, ], round(pca_1$rotation[3, ], 2), pos = 2)
matplot(abs(pca_1$rotation), type = 'l', lty = 1, lwd = 2,
xlab = 'zmienne', ylab = '|ładunki|', ylim = c(0, 1.05),
xaxt = "n")
axis(1, at = 1:3, labels = rownames(pca_1$rotation))
legend('topleft', legend = c('PC1', 'PC2', 'PC3'), ncol = 3, col = 1:3, lwd = 2)
text(rep(1, 3), abs(pca_1$rotation)[1, ], abs(round(pca_1$rotation[1, ], 2)), pos = 4)
text(rep(2, 3), abs(pca_1$rotation)[2, ], abs(round(pca_1$rotation[2, ], 2)), pos = 1)
text(rep(3, 3), abs(pca_1$rotation)[3, ], abs(round(pca_1$rotation[3, ], 2)), pos = 2)
plot(pca_1)
par(mfrow = c(1, 1))
plot(pca_1)
pca_1$sdev^2
mean(pca_1$sdev^2)
biplot(pca_1)
library(ape)
plot(mst(dist(scale(USArrests[, -3]))), x1 = pca_1$x[, 1], x2 = pca_1$x[, 2])