Statystyka/testowe/zadania.R

# testowy egzamin

# ładowanie danych csv
computers <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/computers.csv")
spotify <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/spotify.csv")
weight_height <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/weight-height.csv")


# ZAD 2 - tego trochę nie rozumiem
w_test <- function(x, istotnosc, delta_zero, alternative = c('two.sided', 'less', 'greater')) {
  
  # statystyka testowa
  ss <- (1 / length(x)) * (var(x) - mean(x))
  statistic <- length(x) * ss / delta_zero * delta_zero
  
  # parametr w obszarach krytycznych
  d <- length(x) - 1
  
  # poziom istotności
  alternative <- match.arg(alternative)
  p_value <-  istotnosc
  p_value <-  switch(alternative, 
                     'two.sided' = 2 * min(p_value, 1 - p_value), 
                     'greater' = p_value, 
                     'less' = 1 - p_value)
  
  # rezultat
  names(statistic) <- 'T'
  names(d) <- 'num df'
  result <- list(statistic = statistic, 
                 parameter = d, 
                 p.value = p_value, 
                 alternative = alternative, 
                 method = 'Test istotności dla wariancji w modelu normalnym', 
                 data.name = deparse(substitute(x)))
  class(result) <- 'htest'
  return(result)
}


# ZAD 5
model_1 <- lm(valence ~ acousticness + danceability + energy + instrumentalness +
                liveness + loudness + speechiness + tempo, data = spotify)

summary(model_1)

step(model_1)

model_2 <- lm(valence ~ acousticness + danceability + energy + instrumentalness +
                liveness + loudness + speechiness, data = spotify)

new_data <- data.frame(acousticness=2.84e-06, danceability=0.305, energy=0.827,
                       instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789,
                       speechiness=0.1470, tempo=159.882)

stats::predict(model_2, new_data, interval = "prediction")

new_data <- data.frame(acousticness=2.84e-06, danceability=0.405, energy=0.827,
                       instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789,
                       speechiness=0.1470, tempo=159.882)

stats::predict(model_2, new_data, interval = "prediction")


# ZAD 6
par(mfrow = c(1, 2))

male <- weight_height[weight_height$Gender == "Male", ]
shapiro.test(male$Height)
qqnorm(male$Height)
mean(male$Height)
var(male$Height)

female <- weight_height[weight_height$Gender == "Female", ]
shapiro.test(female$Height)
qqnorm(female$Height)
mean(female$Height)
var(female$Height)

t.test(male$Height, female$Height, alternative = 'greater')$p.value
#bardzo mała wartość, czyli hipoteza że kobiety są jest większe niż mężczyźni jest mało prawdopodobne


# ZAD 7
selected <- computers[computers$screen == 14, ]
liczebnosc <- table(selected$ram)
prop.table(liczebnosc)*100
ram_procent <- data.frame(cbind(liczebnosc = table(selected$ram),
                               procent = prop.table(selected$ram)))
lab1-3 2021-06-17 15:50:43 +02:00			`# testowy egzamin`

			`# ładowanie danych csv`
			`computers <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/computers.csv")`
			`spotify <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/spotify.csv")`
			`weight_height <- read.csv("http://pp98647.home.amu.edu.pl/wp-content/uploads/2021/06/weight-height.csv")`


			`# ZAD 2 - tego trochę nie rozumiem`
			`w_test <- function(x, istotnosc, delta_zero, alternative = c('two.sided', 'less', 'greater')) {`

			`# statystyka testowa`
			`ss <- (1 / length(x)) * (var(x) - mean(x))`
			`statistic <- length(x) * ss / delta_zero * delta_zero`

			`# parametr w obszarach krytycznych`
			`d <- length(x) - 1`

			`# poziom istotności`
			`alternative <- match.arg(alternative)`
			`p_value <- istotnosc`
			`p_value <- switch(alternative,`
			`'two.sided' = 2 * min(p_value, 1 - p_value),`
			`'greater' = p_value,`
			`'less' = 1 - p_value)`

			`# rezultat`
			`names(statistic) <- 'T'`
			`names(d) <- 'num df'`
			`result <- list(statistic = statistic,`
			`parameter = d,`
			`p.value = p_value,`
			`alternative = alternative,`
			`method = 'Test istotności dla wariancji w modelu normalnym',`
			`data.name = deparse(substitute(x)))`
			`class(result) <- 'htest'`
			`return(result)`
			`}`


			`# ZAD 5`
			`model_1 <- lm(valence ~ acousticness + danceability + energy + instrumentalness +`
			`liveness + loudness + speechiness + tempo, data = spotify)`

			`summary(model_1)`

			`step(model_1)`

			`model_2 <- lm(valence ~ acousticness + danceability + energy + instrumentalness +`
			`liveness + loudness + speechiness, data = spotify)`

			`new_data <- data.frame(acousticness=2.84e-06, danceability=0.305, energy=0.827,`
			`instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789,`
			`speechiness=0.1470, tempo=159.882)`

			`stats::predict(model_2, new_data, interval = "prediction")`

			`new_data <- data.frame(acousticness=2.84e-06, danceability=0.405, energy=0.827,`
			`instrumentalness=2.45e-03, liveness=0.3350, loudness=-5.789,`
			`speechiness=0.1470, tempo=159.882)`

			`stats::predict(model_2, new_data, interval = "prediction")`


			`# ZAD 6`
			`par(mfrow = c(1, 2))`

			`male <- weight_height[weight_height$Gender == "Male", ]`
			`shapiro.test(male$Height)`
			`qqnorm(male$Height)`
			`mean(male$Height)`
			`var(male$Height)`

			`female <- weight_height[weight_height$Gender == "Female", ]`
			`shapiro.test(female$Height)`
			`qqnorm(female$Height)`
			`mean(female$Height)`
			`var(female$Height)`

			`t.test(male$Height, female$Height, alternative = 'greater')$p.value`
			`#bardzo mała wartość, czyli hipoteza że kobiety są jest większe niż mężczyźni jest mało prawdopodobne`


			`# ZAD 7`
			`selected <- computers[computers$screen == 14, ]`
			`liczebnosc <- table(selected$ram)`
			`prop.table(liczebnosc)*100`
			`ram_procent <- data.frame(cbind(liczebnosc = table(selected$ram),`
			`procent = prop.table(selected$ram)))`