Statystyka/testowe/zadania.R

# testowy egzamin

# ładowanie danych csv
computers <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/computers.csv")
spotify <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/spotify.csv")
weight_height <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/weight-height.csv")


# ZAD 2 - tego trochę nie rozumiem
w_test <- function(x, istotnosc, delta_zero, alternative = c('two.sided', 'less', 'greater')) {

  # statystyka testowa
  ss <- (1 / length(x)) * (var(x) - mean(x))
  statistic <- length(x) * ss / delta_zero * delta_zero

  # parametr w obszarach krytycznych
  d <- length(x) - 1

  # poziom istotności
  alternative <- match.arg(alternative)
  p_value <-  istotnosc
  p_value <-  switch(alternative,
                     'two.sided' = 2 * min(p_value, 1 - p_value),
                     'greater' = p_value,
                     'less' = 1 - p_value)

  # rezultat
  names(statistic) <- 'T'
  names(d) <- 'num df'
  result <- list(statistic = statistic,
                 parameter = d,
                 p.value = p_value,
                 alternative = alternative,
                 method = 'Test istotności dla wariancji w modelu normalnym',
                 data.name = deparse(substitute(x)))
  class(result) <- 'htest'
  return(result)
}


# ZAD 5
model_1 <- lm(valence ~ acousticness + danceability + energy + instrumentalness +
                liveness + loudness + speechiness + tempo, data = spotify)

summary(model_1)

step(model_1)

model_2 <- lm(valence ~ acousticness + danceability + energy + instrumentalness +
                liveness + loudness + speechiness, data = spotify)

new_data <- data.frame(acousticness=2.84e-06, danceability=0.305, energy=0.827,
                       instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789,
                       speechiness=0.1470, tempo=159.882)

stats::predict(model_2, new_data, interval = "prediction")

new_data <- data.frame(acousticness=2.84e-06, danceability=0.405, energy=0.827,
                       instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789,
                       speechiness=0.1470, tempo=159.882)

stats::predict(model_2, new_data, interval = "prediction")


# ZAD 6
par(mfrow = c(1, 2))

male <- weight_height[weight_height$Gender == "Male", ]
shapiro.test(male$Height)
qqnorm(male$Height)
mean(male$Height)
var(male$Height)

female <- weight_height[weight_height$Gender == "Female", ]
shapiro.test(female$Height)
qqnorm(female$Height)
mean(female$Height)
var(female$Height)

t.test(male$Height, female$Height, alternative = 'greater')$p.value
#bardzo mała wartość, czyli hipoteza że kobiety są jest większe niż mężczyźni jest mało prawdopodobne


# ZAD 7
selected <- computers[computers$screen == 14, ]
liczebnosc <- table(selected$ram)
prop.table(liczebnosc)*100
ram_procent <- data.frame(cbind(liczebnosc = table(selected$ram),
                               procent = prop.table(selected$ram)))