# ZAD1 auto <- read.csv("http://ls.home.amu.edu.pl/data_sets/Automobile.csv", sep = ",", header = TRUE, na.strings = "?") head(auto) auto$num.of.doors <- ifelse(auto$num.of.doors == "four", 4, 2) auto_wna <- na.omit(auto) cat("wymiar nowych danych") dim(auto_wna) auto_wna_sel <- subset(auto_wna, select = c(horsepower, city.mpg, peak.rpm, curb.weight, num.of.doors, price)) pairs(auto_wna_sel) model_1 <- lm(price ~ horsepower + city.mpg + peak.rpm + curb.weight + num.of.doors, data = auto_wna) model_1 coef(model_1) confint(model_1) summary(model_1) fitted(model_1) residuals(model_1) step(model_1) step(model_1, k = log(nrow(auto_wna))) model_0 <- lm(price ~ 1, data = auto_wna) step(model_0, direction = "forward", scope = formula(model_1)) step(model_0, direction = "forward", scope = formula(model_1), k = log(nrow(auto_wna))) model_1_1 <- lm(price ~ horsepower + city.mpg + curb.weight + num.of.doors, data = auto_wna) summary(model_1_1)$coefficients summary(model_1_1)$adj.r.squared model_1_2 <- lm(price ~ horsepower + curb.weight + num.of.doors, data = auto_wna) summary(model_1_2)$coefficients summary(model_1_2)$adj.r.squared model_1_3 <- lm(price ~ horsepower + curb.weight, data = auto_wna) summary(model_1_3)$coefficients summary(model_1_3)$adj.r.squared auto_sel <- subset(auto, select = c(horsepower, city.mpg, peak.rpm, curb.weight, num.of.doors, price)) summary(auto_sel) install.packages("Hmisc") library(Hmisc) auto_sel$price <- as.numeric(impute(auto_sel$price, mean)) auto_sel$horsepower <- as.numeric(impute(auto_sel$horsepower, mean)) auto_sel$peak.rpm <- as.numeric(impute(auto_sel$peak.rpm, mean)) auto_sel$num.of.doors <- as.numeric(impute(auto_sel$num.of.doors, median)) summary(auto_sel) cat("2.", "\n") pairs(auto_sel) model_1_i <- lm(price ~ horsepower + city.mpg + peak.rpm + curb.weight + num.of.doors, data = auto_sel) model_1_i coef(model_1_i) confint(model_1_i) summary(model_1_i) fitted(model_1_i) residuals(model_1_i) cat("3.", "\n") step(model_1_i) step(model_1_i, k = log(nrow(auto_sel))) model_0_i <- lm(price ~ 1, data = auto_sel) step(model_0_i, direction = "forward", scope = formula(model_1_i)) step(model_0_i, direction = "forward", scope = formula(model_1_i), k = log(nrow(auto_sel))) cat("4.", "\n") model_1_i_1 <- lm(price ~ horsepower + city.mpg + curb.weight + peak.rpm, data = auto_sel) summary(model_1_i_1)$coefficients summary(model_1_i_1)$adj.r.squared model_1_i_2 <- lm(price ~ horsepower + city.mpg + curb.weight, data = auto_sel) summary(model_1_i_2)$coefficients summary(model_1_i_2)$adj.r.squared model_1_i_3 <- lm(price ~ horsepower + curb.weight, data = auto_sel) summary(model_1_i_3)$coefficients summary(model_1_i_3)$adj.r.squared new_data <- data.frame(curb.weight = 2823, horsepower = 154) model_2 <- lm(price ~ curb.weight + horsepower, data = auto_wna) model_2_i <- lm(price ~ curb.weight + horsepower, data = auto_sel) stats::predict(model_2, new_data, interval = "prediction") stats::predict(model_2_i, new_data, interval = "prediction") summary(model_2)$adj.r.squared summary(model_2_i)$adj.r.squared