wizualizacja-danych/projekt.R

71 lines
2.3 KiB
R
Raw Normal View History

2021-04-14 00:14:04 +02:00
# Projekt 1: Przygotowanie wizualnej analizy danych z wykorzystaniem podstawowej biblioteki graficznej R i/lub biblioteki ggplot2
# zaladowanie bibliotek
library(Hmisc)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
# zaladowanie danych
german_credit_risk <- read.csv("german_credit_data.csv", header = TRUE)
# sprawdzenie danych
head(german_credit_risk)
tail(german_credit_risk)
str(german_credit_risk)
summary(german_credit_risk)
describe(german_credit_risk)
# zmiana nazwy pierwszej kolumny
colnames(german_credit_risk)[1] <- "index"
min(german_credit_risk$Age)
na.omit(german_credit_risk)
# violin plot
ggplot(german_credit_risk, aes(x=Purpose, y=Age, fill=Sex)) +
2021-04-14 00:15:37 +02:00
geom_violin(trim=TRUE, position=position_dodge(1)) +
stat_summary(fun = mean, geom="point", shape=25, size=2) + #position=position_dodge(.9)
2021-04-14 00:14:04 +02:00
labs(title="Credit purpose by age", x="Purpose", y = "Age") +
scale_fill_brewer(palette="Accent") +
theme_minimal() +
theme(legend.position="bottom")
ggplot(german_credit_risk, aes(x = Duration, y = Credit.amount, color = Sex)) +
geom_point(size = 1.5) +
geom_smooth(se = FALSE, size = 1.5) +
labs(title="Credit amount for credit duration", x="Duration", y = "Amount") +
theme_minimal() +
theme(legend.position="bottom")
ggplot(german_credit_risk , aes(x = factor(Job), fill = Purpose)) +
geom_bar() +
scale_x_discrete(breaks = 0:3, labels=c("Unskilled, non-resident", "Unskilled, resident","Skilled","Highly skilled")) +
2021-04-14 01:11:00 +02:00
labs(title="Credit count and purpose for different job statuses", x="Job status", y = "Credit count") +
theme_minimal()
2021-04-14 14:44:19 +02:00
#Wykres pudełkowy przedstawiający jakiej wielkości najczęściej ludzie biorą kredyty na poszczególne cele.
stat_box_data <- function(y) {
return(
data.frame(
y = max(german_credit_risk$Credit.amount),
label = paste('count =', length(y), '\n',
'sum =', sum(y), '\n')
)
)
}
german_credit_risk %>%
mutate(Purpose = reorder(Purpose, Credit.amount, sum)) %>%
ggplot(aes(x = Purpose, y = Credit.amount, fill = Purpose)) +
geom_boxplot(alpha = .7) +
guides(fill = "none") +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
stat_summary(
fun.data = stat_box_data,
geom = "text",
vjust = 1
) +
ylab("Credit amount") +
theme_bw()