hurtownia/raportowanie.r
Mariusz Sielski d6c1eb612a ddd
2019-01-27 14:04:28 +01:00

86 lines
3.0 KiB
R

require(odbc)
require(dbplyr)
require(dplyr)
require(tidyr)
require(ggplot2)
con <- DBI::dbConnect(
odbc::odbc(),
.connection_string = 'driver={SQL Server};server=mssql-2016.labs.wmi.amu.edu.pl;database=bigdata_s26450;trusted_connection=true')
countries <- tbl(con, "dim_countries")
drugs <- tbl(con, "dim_drugs")
time <- tbl(con, "dim_time")
status <- tbl(con, "dim_claim_statuses")
# Zaakceptowane i odrzuconce refundacje w latach
tbl(con, "FT_Refund") %>%
inner_join(time, by = c("response_time_fk" = "time_sk")) %>%
inner_join(status, by = c("claim_status_fk" = "claim_status_sk")) %>%
filter(year >= 2008 && year <= 2016) %>%
group_by(year, status_description) %>%
summarize(how_many = sum(cnt)) %>%
ungroup() %>%
ggplot(aes(year, how_many, fill = status_description)) +
geom_bar(stat = "identity") +
scale_x_continuous(breaks = 2009:2016) +
facet_grid(. ~ status_description)
tbl(con, "FT_Refund") %>%
inner_join(time, by = c("response_time_fk" = "time_sk")) %>%
inner_join(status, by = c("claim_status_fk" = "claim_status_sk")) %>%
filter(year >= 2008 && year <= 2016) %>%
group_by(year, status_description) %>%
summarize(how_many = sum(cnt)) %>%
ungroup() %>%
ggplot(aes(year, how_many, fill = status_description)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = how_many)) +
scale_x_continuous(breaks = 2009:2016) +
ylab("count")
tbl(con, "FT_Refund") %>%
inner_join(countries, by = c("country_fk" = "country_sk")) %>%
inner_join(drugs, by = c("drug_fk" = "drug_sk")) %>%
head(n = 500) %>%
ggplot(aes(price, reimbursement_amountPercent, color = country_name)) +
geom_point() +
xlab("Cena") +
ylab("% zniki")
tbl(con, "FT_Refund") %>%
inner_join(time, by = c("response_time_fk" = "time_sk")) %>%
inner_join(drugs, by = c("drug_fk" = "drug_sk")) %>%
filter(year %in% c(2009, 2010)) %>%
mutate(sale = ifelse(reimbursement_amountPercent > 30, "on sale", "not on sale")) %>%
ggplot(aes(month, price, fill = drug_product_family_name)) +
geom_bar(stat = "identity") +
scale_x_continuous("month", breaks = 1:12) +
facet_grid(sale ~ year)
registration_decades <- tbl(con, "FT_Registration") %>%
inner_join(time, by = c("submission_date_fk" = "time_sk")) %>%
inner_join(status, by = c("claim_status_fk" = "claim_status_sk")) %>%
mutate(decade = floor(year / 10) * 10, got_answer = status_code_bk != "W") %>%
filter(decade < 2020, decade >= 1980)
with_grouping_by_answer <- registration_decades %>%
group_by(decade) %>%
summarize(how_many = sum(cnt))
registration_decades %>%
group_by(decade, got_answer) %>%
summarize(how_many = sum(cnt)) %>%
filter(how_many > 0) %>%
ggplot(aes(decade, how_many, fill = got_answer)) +
geom_bar(stat = "identity") +
geom_label(aes(label = how_many)) +
geom_label(data = with_grouping_by_answer, aes(y = how_many, label = how_many, fill = NULL))