require(odbc) require(dbplyr) require(dplyr) require(tidyr) require(ggplot2) con <- DBI::dbConnect( odbc::odbc(), .connection_string = 'driver={SQL Server};server=mssql-2016.labs.wmi.amu.edu.pl;database=bigdata_s26450;trusted_connection=true') countries <- tbl(con, "dim_countries") drugs <- tbl(con, "dim_drugs") time <- tbl(con, "dim_time") status <- tbl(con, "dim_claim_statuses") # Zaakceptowane i odrzuconce refundacje w latach tbl(con, "FT_Refund") %>% inner_join(time, by = c("response_time_fk" = "time_sk")) %>% inner_join(status, by = c("claim_status_fk" = "claim_status_sk")) %>% filter(year >= 2008 && year <= 2016) %>% group_by(year, status_description) %>% summarize(how_many = sum(cnt)) %>% ungroup() %>% ggplot(aes(year, how_many, fill = status_description)) + geom_bar(stat = "identity") + scale_x_continuous(breaks = 2009:2016) + facet_grid(. ~ status_description) tbl(con, "FT_Refund") %>% inner_join(time, by = c("response_time_fk" = "time_sk")) %>% inner_join(status, by = c("claim_status_fk" = "claim_status_sk")) %>% filter(year >= 2008 && year <= 2016) %>% group_by(year, status_description) %>% summarize(how_many = sum(cnt)) %>% ungroup() %>% ggplot(aes(year, how_many, fill = status_description)) + geom_bar(stat = "identity", position = "dodge") + geom_text(aes(label = how_many)) + scale_x_continuous(breaks = 2009:2016) + ylab("count") tbl(con, "FT_Refund") %>% inner_join(countries, by = c("country_fk" = "country_sk")) %>% inner_join(drugs, by = c("drug_fk" = "drug_sk")) %>% head(n = 500) %>% ggplot(aes(price, reimbursement_amountPercent, color = country_name)) + geom_point() + xlab("Cena") + ylab("% zniki") tbl(con, "FT_Refund") %>% inner_join(time, by = c("response_time_fk" = "time_sk")) %>% inner_join(drugs, by = c("drug_fk" = "drug_sk")) %>% filter(year %in% c(2009, 2010)) %>% mutate(sale = ifelse(reimbursement_amountPercent > 30, "on sale", "not on sale")) %>% ggplot(aes(month, price, fill = drug_product_family_name)) + geom_bar(stat = "identity") + scale_x_continuous("month", breaks = 1:12) + facet_grid(sale ~ year) registration_decades <- tbl(con, "FT_Registration") %>% inner_join(time, by = c("submission_date_fk" = "time_sk")) %>% inner_join(status, by = c("claim_status_fk" = "claim_status_sk")) %>% mutate(decade = floor(year / 10) * 10, got_answer = status_code_bk != "W") %>% filter(decade < 2020, decade >= 1980) with_grouping_by_answer <- registration_decades %>% group_by(decade) %>% summarize(how_many = sum(cnt)) registration_decades %>% group_by(decade, got_answer) %>% summarize(how_many = sum(cnt)) %>% filter(how_many > 0) %>% ggplot(aes(decade, how_many, fill = got_answer)) + geom_bar(stat = "identity") + geom_label(aes(label = how_many)) + geom_label(data = with_grouping_by_answer, aes(y = how_many, label = how_many, fill = NULL))