2021-04-14 00:14:04 +02:00
|
|
|
# Projekt 1: Przygotowanie wizualnej analizy danych z wykorzystaniem podstawowej biblioteki graficznej R i/lub biblioteki ggplot2
|
|
|
|
# zaladowanie bibliotek
|
|
|
|
library(Hmisc)
|
|
|
|
library(dplyr)
|
|
|
|
library(ggplot2)
|
|
|
|
library(RColorBrewer)
|
|
|
|
|
|
|
|
# zaladowanie danych
|
2021-05-26 02:17:30 +02:00
|
|
|
german_credit_risk <- read.csv("proj1/german_credit_data.csv", header = TRUE)
|
2021-04-14 00:14:04 +02:00
|
|
|
|
|
|
|
# sprawdzenie danych
|
|
|
|
head(german_credit_risk)
|
|
|
|
tail(german_credit_risk)
|
|
|
|
str(german_credit_risk)
|
|
|
|
summary(german_credit_risk)
|
|
|
|
describe(german_credit_risk)
|
|
|
|
|
|
|
|
# zmiana nazwy pierwszej kolumny
|
|
|
|
colnames(german_credit_risk)[1] <- "index"
|
|
|
|
|
|
|
|
min(german_credit_risk$Age)
|
|
|
|
na.omit(german_credit_risk)
|
|
|
|
|
|
|
|
# violin plot
|
|
|
|
ggplot(german_credit_risk, aes(x=Purpose, y=Age, fill=Sex)) +
|
2021-04-14 00:15:37 +02:00
|
|
|
geom_violin(trim=TRUE, position=position_dodge(1)) +
|
|
|
|
stat_summary(fun = mean, geom="point", shape=25, size=2) + #position=position_dodge(.9)
|
2021-04-14 00:14:04 +02:00
|
|
|
labs(title="Credit purpose by age", x="Purpose", y = "Age") +
|
|
|
|
scale_fill_brewer(palette="Accent") +
|
|
|
|
theme_minimal() +
|
|
|
|
theme(legend.position="bottom")
|
|
|
|
|
2021-04-14 15:23:03 +02:00
|
|
|
# ridge plot
|
|
|
|
german_credit_risk <- na.omit(german_credit_risk)
|
|
|
|
ggplot(german_credit_risk, aes(x=Credit.amount,y=Checking.account,fill=Checking.account))+
|
|
|
|
geom_density_ridges_gradient(scale = 8, show.legend = TRUE, rel_min_height = 0.00) + theme_ridges() +
|
|
|
|
scale_fill_brewer(palette = 4)+
|
|
|
|
scale_y_discrete(expand = c(0.01, 0)) +
|
|
|
|
scale_x_continuous(expand = c(0.01, 0)) +
|
|
|
|
labs(x = "Credit amount [DM]",y = "Checking account", fill="Checking account status") +
|
|
|
|
ggtitle("Credit amount density estimation by checking account status ") +
|
|
|
|
theme(plot.title = element_text(hjust = 0.5))
|
|
|
|
|
2021-04-14 01:07:46 +02:00
|
|
|
|
|
|
|
ggplot(german_credit_risk, aes(x = Duration, y = Credit.amount, color = Sex)) +
|
|
|
|
geom_point(size = 1.5) +
|
|
|
|
geom_smooth(se = FALSE, size = 1.5) +
|
|
|
|
labs(title="Credit amount for credit duration", x="Duration", y = "Amount") +
|
|
|
|
theme_minimal() +
|
|
|
|
theme(legend.position="bottom")
|
|
|
|
|
|
|
|
ggplot(german_credit_risk , aes(x = factor(Job), fill = Purpose)) +
|
|
|
|
geom_bar() +
|
|
|
|
scale_x_discrete(breaks = 0:3, labels=c("Unskilled, non-resident", "Unskilled, resident","Skilled","Highly skilled")) +
|
2021-04-14 01:11:00 +02:00
|
|
|
labs(title="Credit count and purpose for different job statuses", x="Job status", y = "Credit count") +
|
|
|
|
theme_minimal()
|
2021-04-14 14:44:19 +02:00
|
|
|
|
|
|
|
#Wykres pudełkowy przedstawiający jakiej wielkości najczęściej ludzie biorą kredyty na poszczególne cele.
|
|
|
|
stat_box_data <- function(y) {
|
|
|
|
return(
|
|
|
|
data.frame(
|
|
|
|
y = max(german_credit_risk$Credit.amount),
|
|
|
|
label = paste('count =', length(y), '\n',
|
|
|
|
'sum =', sum(y), '\n')
|
|
|
|
)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
german_credit_risk %>%
|
|
|
|
mutate(Purpose = reorder(Purpose, Credit.amount, sum)) %>%
|
|
|
|
ggplot(aes(x = Purpose, y = Credit.amount, fill = Purpose)) +
|
|
|
|
geom_boxplot(alpha = .7) +
|
|
|
|
guides(fill = "none") +
|
|
|
|
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
|
|
|
|
stat_summary(
|
|
|
|
fun.data = stat_box_data,
|
|
|
|
geom = "text",
|
|
|
|
vjust = 1
|
|
|
|
) +
|
|
|
|
ylab("Credit amount") +
|
|
|
|
theme_bw()
|
2021-04-21 17:37:58 +02:00
|
|
|
|
|
|
|
#Histogram przedstawiający ilość wziętych kredytów w zależności od wieku i z przeznaczeniem na co z linią gęstości
|
|
|
|
german_credit_risk %>%
|
|
|
|
ggplot(aes(x=Age, fill = Purpose)) +
|
|
|
|
geom_histogram(color = 'white', binwidth = 1) +
|
|
|
|
scale_x_continuous(name="Age", breaks = scales::pretty_breaks(n = 10)) +
|
|
|
|
ylab('Number of credits') +
|
|
|
|
stat_density(aes(x=Age, y=..count..), geom = "line", inherit.aes = FALSE, size = 1.10, color = '#555555', adjust = 1)
|
|
|
|
|