# dataset: https://www.kaggle.com/aiaiaidavid/airplane-crash-fatalities-since-1908-dv-03032020 # setup srodowiska #### setwd('/Users/patrycjalazna/Desktop/wizualizacja-danych/projekt_3/') # importy #### library(shiny) library(shinyTime) library(shinyjs) library(highcharter) library(splusTimeDate) library(tidyverse) library(lubridate) library(gridExtra) library(usmap) library(tidytext) library(tm) library(SnowballC) library(wordcloud) library(dplyr) library(streamgraph) library(plotly) library(DT) # zaladowanie datasetu #### airplane_crashes <- read.csv('airplane_crashes.csv') # podsumowanie #### summary <- summary(airplane_crashes) colnames(airplane_crashes) # preprocessing #### # zmiana nazwy kolumn colnames(airplane_crashes) <- c('Date', 'Time', 'Location', 'Operator', 'Flight', 'Route', 'Type', 'Registration', 'cn/ln', 'Total_Onboard', 'Passengers_Onboard', 'Crew_Onboard', 'Total_Fatalities', 'Passengers_Fatalities', 'Crew_Fatalities', 'Ground', 'Summary') # konwersja na prawidlowy format Date airplane_crashes$Date <- mdy(airplane_crashes$Date) airplane_crashes$Time <- hm(airplane_crashes$Time) # ekstrakcja informacji z datasetu, zapis w poprawnym formacie # ekstrakcja informacji z kolumny Date airplane_crashes$Year <- year(airplane_crashes$Date) airplane_crashes$Month <- as.factor(month(airplane_crashes$Date)) airplane_crashes$Day <- as.factor(day(airplane_crashes$Date)) airplane_crashes$Weekday <- as.factor(wday(airplane_crashes$Date)) airplane_crashes$Is_Leap_Year <- leap_year(airplane_crashes$Date) airplane_crashes$Decade <- year(floor_date(airplane_crashes$Date, years(10))) airplane_crashes$Date <- format(as.Date(airplane_crashes$Date, '%m/%d/%Y'), '%d/%m/%Y') # ekstrakcja godziny, minuty i AM/PM z kolumny Time airplane_crashes$Hour <- as.integer(hour(airplane_crashes$Time)) airplane_crashes$Minute <- as.factor(minute(airplane_crashes$Time)) airplane_crashes$AM_PM <- if_else(am(airplane_crashes$Time), 'AM', 'PM') airplane_crashes$btwn_6PM_6AM <- if_else # ShinyApp #### ui <- fluidPage( id = "main_content", navbarPage("Airplane crashes from 1908 to 2020", # tabPanel("General overview", # sidebarLayout( # sidebarPanel( # # wybór daty # dateRangeInput('dates', # 'Date range:', # min(airplane_crashes$Date), # max(airplane_crashes$Date)), # # wybor godziny # timeInput("time", "Time (local):", seconds = FALSE) # ), # mainPanel( # # wykresiki # plotlyOutput("weekdayCrashes"), # fixedRow( # column(3, plotlyOutput("hourCrashes")), # column(3, plotlyOutput("monthCrashes")) # ) # ) # ) # ), tabPanel("General overview", fixedRow( column(3, dateRangeInput('dates', 'Date range:', min(airplane_crashes$Date), max(airplane_crashes$Date)), # wybor godziny timeInput("time", "Time (local):", seconds = FALSE) ), column(6, plotOutput("crashes_each_year")) ), fixedRow( column(3, plotlyOutput("hourCrashes")), column(3, plotlyOutput("weekdayCrashes")), column(3, plotlyOutput("monthCrashes")) )), tabPanel("Streamgraph", mainPanel( streamgraphOutput("operatorCrashes") ) ), tabPanel("Map", mainPanel( # mapy plotlyOutput("mapPlot") #plotOutput("mapPlot") ) ), tabPanel("Data", sidebarPanel( dateRangeInput('dataDates', 'Date range:', min(airplane_crashes$Date), max(airplane_crashes$Date)), downloadButton('downloadData', 'Download') ), mainPanel( DT::dataTableOutput('allData') )) ) ) server <- function(input, output, session) { # godzina observe(print(strftime(input$time, "%R"))) output$crashes_each_year <- renderPlot({ year_wise <- airplane_crashes %>% count(Year) year_wise <- year_wise %>% filter(Year > year(input$dates[1])) %>% filter(Year < year(input$dates[2])) ggplot(year_wise, aes(x = Year, y = n)) + geom_col(fill = '#0f4c75', col = 'white') + labs(title = 'No. of Airplane crashes since 1908 each year', x = '', y = '') + scale_x_continuous(breaks = seq(1908, 2020, 4)) }) output$weekdayCrashes <- renderPlotly({ week_day <- airplane_crashes %>% count(Weekday) week_day$Weekday <- as.character(week_day$Weekday) week_day$Weekday[week_day$Weekday == 1] <- "Monday" week_day$Weekday[week_day$Weekday == 2] <- "Tuesday" week_day$Weekday[week_day$Weekday == 3] <- "Wednesday" week_day$Weekday[week_day$Weekday == 4] <- "Thursday" week_day$Weekday[week_day$Weekday == 5] <- "Friday" week_day$Weekday[week_day$Weekday == 6] <- "Saturday" week_day$Weekday[week_day$Weekday == 7] <- "Sunday" # zmiana kolejnosci dni; dni nie powinny sie wyswietlac alfabetycznie week_day$Weekday <- factor(week_day$Weekday, levels= c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')) # plot_ly( # x = week_day$Weekday, y = week_day$n, # name = 'No. of crashes per week day', type = "bar" # ) %>% # layout(title = 'Week Day Crashes', # xaxis = list(title = "Weekdays", # zeroline = FALSE), # yaxis = list(title = "No. of crashes", # zeroline = FALSE)) df <- week_day df <- df %>% arrange(Weekday) fig <- df %>% plot_ly(labels = ~Weekday, values = ~n, sort=FALSE) fig <- fig %>% add_pie(hole = 0.4) fig <- fig %>% layout(title = "No. of crashes per weekday", xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE), yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE)) fig # # df <- df %>% arrange(Hour) # df$Hour <- as.character(df$Hour) # df$Hour <- paste(df$Hour, ":00", sep = "") # fig <- df %>% plot_ly(labels = ~Hour, values = ~n, sort=FALSE) # fig <- fig %>% add_pie(hole = 0.4) # fig <- fig %>% layout(title = "No. of crashes per hour", xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE), # yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE)) # # fig }) output$hourCrashes <- renderPlotly({ crashes_per_hours <- airplane_crashes %>% count(Hour) crashes_per_hours <- crashes_per_hours[-c(1,26,27,28), ] #dodanie pozycji labeli # crashes_per_hours <- crashes_per_hours %>% # arrange(Hour) %>% # mutate(lab.ypos = cumsum(n) - 0.5*n) # # ggplot(crashes_per_hours, aes(x = "", y = n, fill = Hour)) + # geom_bar(stat="identity", width=1, color="white") + # coord_polar("y", start=0) + # geom_text(aes(y = lab.ypos, label = Hour), color = "white")+ # theme_void() + # theme(legend.position="none") df <- crashes_per_hours crashes_per_months$Month[crashes_per_months$Month == 1] <- "January" df <- df %>% arrange(Hour) df$Hour <- as.character(df$Hour) df$Hour <- paste(df$Hour, ":00", sep = "") fig <- df %>% plot_ly(labels = ~Hour, values = ~n, sort=FALSE) fig <- fig %>% add_pie(hole = 0.4) fig <- fig %>% layout(title = "No. of crashes per hour", xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE), yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE)) fig }) output$monthCrashes <- renderPlotly({ crashes_per_months <- airplane_crashes %>% count(Month) crashes_per_months$Month <- as.character(crashes_per_months$Month) crashes_per_months$Month[crashes_per_months$Month == 1] <- "January" crashes_per_months$Month[crashes_per_months$Month == 2] <- "February" crashes_per_months$Month[crashes_per_months$Month == 3] <- "March" crashes_per_months$Month[crashes_per_months$Month == 4] <- "April" crashes_per_months$Month[crashes_per_months$Month == 5] <- "May" crashes_per_months$Month[crashes_per_months$Month == 6] <- "June" crashes_per_months$Month[crashes_per_months$Month == 7] <- "July" crashes_per_months$Month[crashes_per_months$Month == 8] <- "August" crashes_per_months$Month[crashes_per_months$Month == 9] <- "September" crashes_per_months$Month[crashes_per_months$Month == 10] <- "October" crashes_per_months$Month[crashes_per_months$Month == 11] <- "November" crashes_per_months$Month[crashes_per_months$Month == 12] <- "December" df <- crashes_per_months df <- df %>% arrange(match(Month, c("January", "February","March", "April", "May", "June", "July", "August", "September", "October", "November", "December"))) fig <- df %>% plot_ly(labels = ~Month, values = ~n, sort=FALSE) fig <- fig %>% add_pie(hole = 0.4) fig <- fig %>% layout(title = "No. of crashes per month", xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE), yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE)) fig }) output$operatorCrashes <- renderStreamgraph({ main_type_wise_year <- airplane_crashes %>% select(Year, Type) %>% # replace model number by empty strings mutate(main_type = str_replace_all(Type, "[A-Za-z]*-?\\d+-?[A-Za-z]*.*", "")) %>% # skip empty strings row filter(main_type > 'A') %>% group_by(Year, main_type) %>% summarize(n = n()) %>% group_by(Year) %>% top_n(5, n) # i teraz z tego streamgraph main_type_wise_year <- main_type_wise_year %>% arrange(-n) streamgraph <- main_type_wise_year %>% group_by(Year, main_type) %>% tally(wt=n) %>% streamgraph("main_type", "n", "Year", interpolate="cardinal") %>% sg_fill_brewer("PuOr") %>% sg_legend(TRUE) streamgraph }) output$mapPlot <- renderPlotly({ states_list <- c('Alabama','Alaska','Alaksa','Arizona','Arkansas',"California", "Colorado", "Connecticut","Delaware","Florida","Georgia", "Hawaii","Idaho","Illinois", "Indiana","Iowa","Kansas", "Kentucky","Louisiana","Maine","Maryland", "Massachusetts", "Massachusett", "Michigan","Minnesota","Mississippi","Missouri", "Montana", "Nebraska","Nevada","New Hampshire","New Jersey", "New Mexico","New York", "North Carolina","North Dakota","Ohio", "Oklahoma", "Oklohoma", "Oregon","Pennsylvania", "Rhode Island", "South Carolina", "South Dakota",'Tennesee',"Tennessee","Texas","Utah", "Vermont", 'Virginia',"Washington D.C.", "Washington, D.C.", "Washington", "West Virginia","Wisconsin","Wyoming", "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY") location <- airplane_crashes %>% select(Location) for(state in states_list) { location <- location %>% mutate(Location = str_replace_all(Location, state, paste(state, ', USA', sep = ''))) %>% mutate(Location = str_replace_all(Location, 'USA.*, ', '')) %>% mutate(Location = str_replace(Location, 'West Virginia, USA,', '')) %>% mutate(Location = str_replace(Location, 'Afghanstan', 'Afghanistan')) %>% mutate(Location = str_replace(Location, 'Airzona|Arazona', 'Arizona')) %>% mutate(Location = str_replace(Location, 'Alakska', 'Alaska')) %>% mutate(Location = str_replace(Location, 'Cailifornia|Calilfornia', 'California')) %>% mutate(Location = str_replace(Location, 'D.*Congo', 'DR Congo')) %>% mutate(Location = str_replace(Location, 'Domincan Republic', 'Dominican Republic')) %>% mutate(Location = str_replace(Location, 'Hati', 'Haiti')) %>% mutate(Location = str_replace(Location, ' International Airport', '')) %>% mutate(Location = str_replace(Location, 'Morrocco|Morroco', 'Morocco')) %>% mutate(Location = str_replace(Location, 'Phillipines', 'Phillipines')) %>% mutate(Location = str_replace(Location, 'Burma', 'Myanmar')) %>% mutate(Location = str_replace(Location, '([Ss]outhern|[Nn]orthern|[Ww]estern|[Ee]astern) ', ''))} country_state <- location %>% select(Location) %>% filter(!str_detect(Location, '[Oo]cean|[Ss]ea|[Cc]hannel|Gulf of')) %>% mutate(Location = str_replace(Location, '(Near|Off|Over) ', '')) %>% mutate(Location = str_replace(Location, 'USA, Australia', 'Australia')) %>% mutate(State_Province = str_replace(Location, '(.*, )?(.*), (.*)', '\\2')) %>% mutate(Country = str_replace(Location, '.*,\\s*', '')) cntry <- country_state %>% group_by(Country) %>% summarize(n = n()) %>% arrange(desc(n)) # podzial na kategorie wg liczby wypadkow w danym kraju cntry <- cntry %>% mutate(m = case_when( n > 200 ~ "200 +", n < 200 & n >= 100 ~ "199 - 100", n < 100 & n >= 50 ~ "99 - 50", n < 50 & n >= 10 ~ "49 - 10", n < 10 ~ "< 10")) %>% mutate(m = factor(m, levels = c("< 10", "49 - 10", "99 - 50", "199 - 100", "200 +"))) world_map <- map_data("world") map_data <- cntry %>% full_join(world_map, by = c('Country' = 'region')) map_pal = c('#E6CE39', '#F09B3C', '#D94B41', '#CD3CF0', '#555CEB') ggplot_obj <- ggplot(map_data, aes(x = long, y = lat, group = group, fill = m)) + geom_polygon(colour = "white") + labs(title = 'No. of crashes in each country', x = '', y = '', fill = '') + scale_fill_manual(values = map_pal, na.value = 'whitesmoke') + theme(legend.position='right', legend.justification = "top") + guides(fill = guide_legend(reverse = TRUE)) ggplotly( p = ggplot_obj, tooltip = "all", dynamicTicks = FALSE, layerData = 1, originalData = TRUE ) }) output$downloadData <- downloadHandler( filename = function() { paste('airplane_crashes', '.csv', sep='') }, content = function(file) { write.csv(airplane_crashes, file) } ) output$allData <- DT::renderDataTable({ airplane_crashes %>% # filter(Year == input$dataDates) %>% DT::datatable() }) } shinyApp(ui = ui, server = server) # tutaj sb rysuje wykresiki demo, zeby nie odpalac shiny za kazdym razem #### # number of fatal accidents by operator - top 5 ## # extract and group by manufacturer # main_type_wise_year <- airplane_crashes %>% # select(Year, Type) %>% # # replace model number by empty strings # mutate(main_type = str_replace_all(Type, "[A-Za-z]*-?\\d+-?[A-Za-z]*.*", "")) %>% # # skip empty strings row # filter(main_type > 'A') %>% # group_by(Year, main_type) %>% # summarize(n = n()) %>% # group_by(Year) %>% # top_n(5, n) # #%>% mutate(main_type = reorder_within(-n, main_type, Year)) # # # i teraz z tego streamgraph # main_type_wise_year <- main_type_wise_year %>% arrange(-n) # main_type_wise_year %>% # group_by(Year, main_type) %>% # tally(wt=n) %>% # streamgraph("main_type", "n", "Year", interpolate="cardinal") %>% # sg_fill_brewer("PuOr") %>% # sg_legend(TRUE) #### # airplane_operators <- airplane_crashes %>% count(Operator, sort = TRUE) # airplane_operators <- head(airplane_operators, 5) # ggplot(airplane_operators, aes(x = Operator, y = n)) + # geom_point(color = "#765432", alpha = 0.8) + # labs(title = "No. of fatal accidents by operator", x = '', y = '') # # airplane_operators %>% # streamgraph(key="Operator", value=, date="Year", offset="zero") %>% # sg_fill_brewer("BuPu") # wykres ile wypadkow kazdego operatora w kazdym roku