86 lines
3.0 KiB
R
86 lines
3.0 KiB
R
require(odbc)
|
|
require(dbplyr)
|
|
require(dplyr)
|
|
require(tidyr)
|
|
require(ggplot2)
|
|
|
|
con <- DBI::dbConnect(
|
|
odbc::odbc(),
|
|
.connection_string = 'driver={SQL Server};server=mssql-2016.labs.wmi.amu.edu.pl;database=bigdata_s26450;trusted_connection=true')
|
|
|
|
countries <- tbl(con, "dim_countries")
|
|
drugs <- tbl(con, "dim_drugs")
|
|
time <- tbl(con, "dim_time")
|
|
status <- tbl(con, "dim_claim_statuses")
|
|
|
|
# Zaakceptowane i odrzuconce refundacje w latach
|
|
tbl(con, "FT_Refund") %>%
|
|
inner_join(time, by = c("response_time_fk" = "time_sk")) %>%
|
|
inner_join(status, by = c("claim_status_fk" = "claim_status_sk")) %>%
|
|
filter(year >= 2008 && year <= 2016) %>%
|
|
group_by(year, status_description) %>%
|
|
summarize(how_many = sum(cnt)) %>%
|
|
ungroup() %>%
|
|
ggplot(aes(year, how_many, fill = status_description)) +
|
|
geom_bar(stat = "identity") +
|
|
scale_x_continuous(breaks = 2009:2016) +
|
|
facet_grid(. ~ status_description)
|
|
|
|
tbl(con, "FT_Refund") %>%
|
|
inner_join(time, by = c("response_time_fk" = "time_sk")) %>%
|
|
inner_join(status, by = c("claim_status_fk" = "claim_status_sk")) %>%
|
|
filter(year >= 2008 && year <= 2016) %>%
|
|
group_by(year, status_description) %>%
|
|
summarize(how_many = sum(cnt)) %>%
|
|
ungroup() %>%
|
|
ggplot(aes(year, how_many, fill = status_description)) +
|
|
geom_bar(stat = "identity", position = "dodge") +
|
|
geom_text(aes(label = how_many)) +
|
|
scale_x_continuous(breaks = 2009:2016) +
|
|
ylab("count")
|
|
|
|
|
|
tbl(con, "FT_Refund") %>%
|
|
inner_join(countries, by = c("country_fk" = "country_sk")) %>%
|
|
inner_join(drugs, by = c("drug_fk" = "drug_sk")) %>%
|
|
head(n = 500) %>%
|
|
ggplot(aes(price, reimbursement_amountPercent, color = country_name)) +
|
|
geom_point() +
|
|
xlab("Cena") +
|
|
ylab("% zniki")
|
|
|
|
tbl(con, "FT_Refund") %>%
|
|
inner_join(time, by = c("response_time_fk" = "time_sk")) %>%
|
|
inner_join(drugs, by = c("drug_fk" = "drug_sk")) %>%
|
|
filter(year %in% c(2009, 2010)) %>%
|
|
mutate(sale = ifelse(reimbursement_amountPercent > 30, "on sale", "not on sale")) %>%
|
|
ggplot(aes(month, price, fill = drug_product_family_name)) +
|
|
geom_bar(stat = "identity") +
|
|
scale_x_continuous("month", breaks = 1:12) +
|
|
facet_grid(sale ~ year)
|
|
|
|
|
|
registration_decades <- tbl(con, "FT_Registration") %>%
|
|
inner_join(time, by = c("submission_date_fk" = "time_sk")) %>%
|
|
inner_join(status, by = c("claim_status_fk" = "claim_status_sk")) %>%
|
|
mutate(decade = floor(year / 10) * 10, got_answer = status_code_bk != "W") %>%
|
|
filter(decade < 2020, decade >= 1980)
|
|
|
|
|
|
with_grouping_by_answer <- registration_decades %>%
|
|
group_by(decade) %>%
|
|
summarize(how_many = sum(cnt))
|
|
|
|
registration_decades %>%
|
|
group_by(decade, got_answer) %>%
|
|
summarize(how_many = sum(cnt)) %>%
|
|
filter(how_many > 0) %>%
|
|
ggplot(aes(decade, how_many, fill = got_answer)) +
|
|
geom_bar(stat = "identity") +
|
|
geom_label(aes(label = how_many)) +
|
|
geom_label(data = with_grouping_by_answer, aes(y = how_many, label = how_many, fill = NULL))
|
|
|
|
|
|
|
|
|