2021-06-23 14:40:48 +02:00
|
|
|
# dataset: https://www.kaggle.com/aiaiaidavid/airplane-crash-fatalities-since-1908-dv-03032020
|
2021-06-22 19:53:23 +02:00
|
|
|
# setup srodowiska ####
|
2021-06-23 14:40:48 +02:00
|
|
|
setwd('/Users/patrycjalazna/Desktop/wizualizacja-danych/projekt_3/')
|
2021-06-22 19:53:23 +02:00
|
|
|
|
|
|
|
# importy ####
|
|
|
|
library(shiny)
|
|
|
|
library(shinyTime)
|
|
|
|
library(shinyjs)
|
|
|
|
library(highcharter)
|
|
|
|
library(splusTimeDate)
|
|
|
|
library(tidyverse)
|
|
|
|
library(lubridate)
|
|
|
|
library(gridExtra)
|
|
|
|
library(usmap)
|
|
|
|
library(tidytext)
|
|
|
|
library(tm)
|
|
|
|
library(SnowballC)
|
|
|
|
library(wordcloud)
|
|
|
|
library(dplyr)
|
|
|
|
library(streamgraph)
|
|
|
|
library(plotly)
|
|
|
|
library(DT)
|
|
|
|
|
|
|
|
|
|
|
|
# zaladowanie datasetu ####
|
|
|
|
airplane_crashes <- read.csv('airplane_crashes.csv')
|
|
|
|
|
|
|
|
# podsumowanie ####
|
|
|
|
summary <- summary(airplane_crashes)
|
|
|
|
colnames(airplane_crashes)
|
|
|
|
|
|
|
|
# preprocessing ####
|
|
|
|
# zmiana nazwy kolumn
|
|
|
|
colnames(airplane_crashes) <- c('Date', 'Time', 'Location', 'Operator', 'Flight', 'Route', 'Type',
|
|
|
|
'Registration', 'cn/ln', 'Total_Onboard', 'Passengers_Onboard',
|
|
|
|
'Crew_Onboard', 'Total_Fatalities', 'Passengers_Fatalities',
|
|
|
|
'Crew_Fatalities', 'Ground', 'Summary')
|
|
|
|
|
|
|
|
# konwersja na prawidlowy format Date
|
|
|
|
airplane_crashes$Date <- mdy(airplane_crashes$Date)
|
|
|
|
airplane_crashes$Time <- hm(airplane_crashes$Time)
|
|
|
|
|
|
|
|
# ekstrakcja informacji z datasetu, zapis w poprawnym formacie
|
|
|
|
# ekstrakcja informacji z kolumny Date
|
|
|
|
airplane_crashes$Year <- year(airplane_crashes$Date)
|
|
|
|
airplane_crashes$Month <- as.factor(month(airplane_crashes$Date))
|
|
|
|
airplane_crashes$Day <- as.factor(day(airplane_crashes$Date))
|
|
|
|
airplane_crashes$Weekday <- as.factor(wday(airplane_crashes$Date))
|
|
|
|
airplane_crashes$Is_Leap_Year <- leap_year(airplane_crashes$Date)
|
|
|
|
airplane_crashes$Decade <- year(floor_date(airplane_crashes$Date, years(10)))
|
|
|
|
airplane_crashes$Date <- format(as.Date(airplane_crashes$Date, '%m/%d/%Y'), '%d/%m/%Y')
|
|
|
|
|
|
|
|
# ekstrakcja godziny, minuty i AM/PM z kolumny Time
|
|
|
|
airplane_crashes$Hour <- as.integer(hour(airplane_crashes$Time))
|
|
|
|
airplane_crashes$Minute <- as.factor(minute(airplane_crashes$Time))
|
|
|
|
airplane_crashes$AM_PM <- if_else(am(airplane_crashes$Time), 'AM', 'PM')
|
|
|
|
airplane_crashes$btwn_6PM_6AM <- if_else
|
|
|
|
|
|
|
|
|
|
|
|
# ShinyApp ####
|
2021-06-23 14:40:48 +02:00
|
|
|
|
2021-06-22 19:53:23 +02:00
|
|
|
|
|
|
|
ui <- fluidPage(
|
2021-06-23 14:40:48 +02:00
|
|
|
id = "main_content",
|
|
|
|
navbarPage("Airplane crashes from 1908 to 2020",
|
|
|
|
tabPanel("General overview",
|
|
|
|
sidebarLayout(
|
|
|
|
sidebarPanel(
|
|
|
|
# wybór daty
|
|
|
|
dateRangeInput('dates',
|
|
|
|
'Date range:',
|
|
|
|
min(airplane_crashes$Date),
|
|
|
|
max(airplane_crashes$Date)),
|
|
|
|
# wybor godziny
|
|
|
|
timeInput("time", "Time (local):", seconds = FALSE)
|
|
|
|
),
|
|
|
|
mainPanel(
|
|
|
|
# wykresiki
|
2021-06-23 15:49:55 +02:00
|
|
|
plotlyOutput("weekdayCrashes"),
|
|
|
|
plotlyOutput("hourCrashes")
|
2021-06-23 14:40:48 +02:00
|
|
|
)
|
|
|
|
)
|
|
|
|
),
|
|
|
|
tabPanel("Streamgraph",
|
|
|
|
mainPanel(
|
|
|
|
streamgraphOutput("operatorCrashes")
|
|
|
|
)
|
|
|
|
),
|
|
|
|
|
|
|
|
tabPanel("Map",
|
|
|
|
mainPanel(
|
|
|
|
# mapy
|
|
|
|
plotlyOutput("mapPlot")
|
|
|
|
#plotOutput("mapPlot")
|
|
|
|
)
|
|
|
|
),
|
|
|
|
tabPanel("Data",
|
|
|
|
sidebarPanel(
|
|
|
|
dateRangeInput('dataDates',
|
|
|
|
'Date range:',
|
|
|
|
min(airplane_crashes$Date),
|
|
|
|
max(airplane_crashes$Date))
|
|
|
|
),
|
|
|
|
mainPanel(
|
|
|
|
DT::dataTableOutput('allData')
|
|
|
|
))
|
2021-06-22 19:53:23 +02:00
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2021-06-23 14:40:48 +02:00
|
|
|
|
2021-06-22 19:53:23 +02:00
|
|
|
server <- function(input, output, session) {
|
|
|
|
# godzina
|
|
|
|
observe(print(strftime(input$time, "%R")))
|
|
|
|
|
|
|
|
output$weekdayCrashes <- renderPlotly({
|
2021-06-22 20:00:43 +02:00
|
|
|
|
2021-06-22 19:53:23 +02:00
|
|
|
week_day <- airplane_crashes %>% count(Weekday)
|
|
|
|
|
2021-06-22 20:00:43 +02:00
|
|
|
week_day$Weekday <- as.character(week_day$Weekday)
|
|
|
|
|
|
|
|
week_day$Weekday[week_day$Weekday == 1] <- "Monday"
|
|
|
|
week_day$Weekday[week_day$Weekday == 2] <- "Tuesday"
|
|
|
|
week_day$Weekday[week_day$Weekday == 3] <- "Wednesday"
|
|
|
|
week_day$Weekday[week_day$Weekday == 4] <- "Thursday"
|
|
|
|
week_day$Weekday[week_day$Weekday == 5] <- "Friday"
|
|
|
|
week_day$Weekday[week_day$Weekday == 6] <- "Saturday"
|
|
|
|
week_day$Weekday[week_day$Weekday == 7] <- "Sunday"
|
|
|
|
|
|
|
|
# zmiana kolejnosci dni; dni nie powinny sie wyswietlac alfabetycznie
|
|
|
|
week_day$Weekday <- factor(week_day$Weekday, levels= c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'))
|
|
|
|
|
2021-06-22 19:53:23 +02:00
|
|
|
plot_ly(
|
|
|
|
x = week_day$Weekday, y = week_day$n,
|
2021-06-23 15:49:55 +02:00
|
|
|
name = 'No. of crashes per week day', type = "bar"
|
2021-06-22 19:53:23 +02:00
|
|
|
) %>%
|
|
|
|
layout(title = 'Week Day Crashes',
|
|
|
|
xaxis = list(title = "Weekdays",
|
|
|
|
zeroline = FALSE),
|
|
|
|
yaxis = list(title = "No. of crashes",
|
|
|
|
zeroline = FALSE))
|
|
|
|
|
|
|
|
})
|
|
|
|
|
2021-06-23 15:49:55 +02:00
|
|
|
output$hourCrashes <- renderPlotly({
|
|
|
|
|
|
|
|
crashes_per_hours <- airplane_crashes %>% count(Hour)
|
|
|
|
crashes_per_hours <- crashes_per_hours[-c(1,26,27,28), ]
|
|
|
|
|
|
|
|
|
|
|
|
#dodanie pozycji labeli
|
|
|
|
# crashes_per_hours <- crashes_per_hours %>%
|
|
|
|
# arrange(Hour) %>%
|
|
|
|
# mutate(lab.ypos = cumsum(n) - 0.5*n)
|
|
|
|
#
|
|
|
|
# ggplot(crashes_per_hours, aes(x = "", y = n, fill = Hour)) +
|
|
|
|
# geom_bar(stat="identity", width=1, color="white") +
|
|
|
|
# coord_polar("y", start=0) +
|
|
|
|
# geom_text(aes(y = lab.ypos, label = Hour), color = "white")+
|
|
|
|
# theme_void() +
|
|
|
|
# theme(legend.position="none")
|
|
|
|
|
|
|
|
df <- crashes_per_hours
|
|
|
|
#df <- paste(df$Hour, ":00")
|
|
|
|
df <- df %>% arrange(Hour)
|
|
|
|
fig <- df %>% plot_ly(labels = ~Hour, values = ~n, sort=FALSE)
|
|
|
|
fig <- fig %>% add_pie(hole = 0.4)
|
|
|
|
fig <- fig %>% layout(title = "No. of crashes per hour", xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
|
|
|
|
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
|
|
|
|
|
|
|
|
fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2021-06-23 14:40:48 +02:00
|
|
|
|
|
|
|
output$operatorCrashes <- renderStreamgraph({
|
|
|
|
|
|
|
|
main_type_wise_year <- airplane_crashes %>%
|
|
|
|
select(Year, Type) %>%
|
|
|
|
# replace model number by empty strings
|
|
|
|
mutate(main_type = str_replace_all(Type, "[A-Za-z]*-?\\d+-?[A-Za-z]*.*", "")) %>%
|
|
|
|
# skip empty strings row
|
|
|
|
filter(main_type > 'A') %>%
|
|
|
|
group_by(Year, main_type) %>%
|
|
|
|
summarize(n = n()) %>%
|
|
|
|
group_by(Year) %>%
|
|
|
|
top_n(5, n)
|
|
|
|
|
|
|
|
# i teraz z tego streamgraph
|
|
|
|
main_type_wise_year <- main_type_wise_year %>% arrange(-n)
|
|
|
|
streamgraph <- main_type_wise_year %>%
|
|
|
|
group_by(Year, main_type) %>%
|
|
|
|
tally(wt=n) %>%
|
|
|
|
streamgraph("main_type", "n", "Year", interpolate="cardinal") %>%
|
|
|
|
sg_fill_brewer("PuOr") %>%
|
|
|
|
sg_legend(TRUE)
|
|
|
|
|
|
|
|
streamgraph
|
|
|
|
})
|
|
|
|
|
2021-06-22 19:53:23 +02:00
|
|
|
output$mapPlot <- renderPlotly({
|
|
|
|
|
|
|
|
states_list <- c('Alabama','Alaska','Alaksa','Arizona','Arkansas',"California",
|
|
|
|
"Colorado", "Connecticut","Delaware","Florida","Georgia",
|
|
|
|
"Hawaii","Idaho","Illinois", "Indiana","Iowa","Kansas",
|
|
|
|
"Kentucky","Louisiana","Maine","Maryland", "Massachusetts",
|
|
|
|
"Massachusett", "Michigan","Minnesota","Mississippi","Missouri",
|
|
|
|
"Montana", "Nebraska","Nevada","New Hampshire","New Jersey",
|
|
|
|
"New Mexico","New York", "North Carolina","North Dakota","Ohio",
|
|
|
|
"Oklahoma", "Oklohoma", "Oregon","Pennsylvania", "Rhode Island",
|
|
|
|
"South Carolina",
|
|
|
|
"South Dakota",'Tennesee',"Tennessee","Texas","Utah", "Vermont",
|
|
|
|
'Virginia',"Washington D.C.", "Washington, D.C.", "Washington",
|
|
|
|
"West Virginia","Wisconsin","Wyoming",
|
|
|
|
"AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA",
|
|
|
|
"HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
|
|
|
|
"MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
|
|
|
|
"NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
|
|
|
|
"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY")
|
|
|
|
|
|
|
|
location <- airplane_crashes %>%
|
|
|
|
select(Location)
|
|
|
|
|
|
|
|
for(state in states_list) {
|
|
|
|
location <- location %>%
|
|
|
|
mutate(Location = str_replace_all(Location, state, paste(state, ', USA', sep = ''))) %>%
|
|
|
|
mutate(Location = str_replace_all(Location, 'USA.*, ', '')) %>%
|
|
|
|
mutate(Location = str_replace(Location, 'West Virginia, USA,', '')) %>%
|
|
|
|
mutate(Location = str_replace(Location, 'Afghanstan', 'Afghanistan')) %>%
|
|
|
|
mutate(Location = str_replace(Location, 'Airzona|Arazona', 'Arizona')) %>%
|
|
|
|
mutate(Location = str_replace(Location, 'Alakska', 'Alaska')) %>%
|
|
|
|
mutate(Location = str_replace(Location, 'Cailifornia|Calilfornia', 'California')) %>%
|
|
|
|
mutate(Location = str_replace(Location, 'D.*Congo', 'DR Congo')) %>%
|
|
|
|
mutate(Location = str_replace(Location, 'Domincan Republic', 'Dominican Republic')) %>%
|
|
|
|
mutate(Location = str_replace(Location, 'Hati', 'Haiti')) %>%
|
|
|
|
mutate(Location = str_replace(Location, ' International Airport', '')) %>%
|
|
|
|
mutate(Location = str_replace(Location, 'Morrocco|Morroco', 'Morocco')) %>%
|
|
|
|
mutate(Location = str_replace(Location, 'Phillipines', 'Phillipines')) %>%
|
|
|
|
mutate(Location = str_replace(Location, 'Burma', 'Myanmar')) %>%
|
|
|
|
mutate(Location = str_replace(Location, '([Ss]outhern|[Nn]orthern|[Ww]estern|[Ee]astern) ', ''))}
|
|
|
|
|
|
|
|
country_state <- location %>%
|
|
|
|
select(Location) %>%
|
|
|
|
filter(!str_detect(Location, '[Oo]cean|[Ss]ea|[Cc]hannel|Gulf of')) %>%
|
|
|
|
mutate(Location = str_replace(Location, '(Near|Off|Over) ', '')) %>%
|
|
|
|
mutate(Location = str_replace(Location, 'USA, Australia', 'Australia')) %>%
|
|
|
|
mutate(State_Province = str_replace(Location, '(.*, )?(.*), (.*)', '\\2')) %>%
|
|
|
|
mutate(Country = str_replace(Location, '.*,\\s*', ''))
|
|
|
|
|
|
|
|
cntry <- country_state %>%
|
|
|
|
group_by(Country) %>%
|
|
|
|
summarize(n = n()) %>%
|
|
|
|
arrange(desc(n))
|
|
|
|
|
|
|
|
# podzial na kategorie wg liczby wypadkow w danym kraju
|
|
|
|
cntry <- cntry %>%
|
|
|
|
mutate(m = case_when(
|
|
|
|
n > 200 ~ "200 +",
|
|
|
|
n < 200 & n >= 100 ~ "199 - 100",
|
|
|
|
n < 100 & n >= 50 ~ "99 - 50",
|
|
|
|
n < 50 & n >= 10 ~ "49 - 10",
|
|
|
|
n < 10 ~ "< 10")) %>%
|
|
|
|
mutate(m = factor(m, levels = c("< 10", "49 - 10", "99 - 50", "199 - 100", "200 +")))
|
|
|
|
|
|
|
|
|
|
|
|
world_map <- map_data("world")
|
|
|
|
|
|
|
|
map_data <- cntry %>%
|
|
|
|
full_join(world_map, by = c('Country' = 'region'))
|
|
|
|
|
2021-06-23 15:49:55 +02:00
|
|
|
map_pal = c('#E6CE39', '#F09B3C', '#D94B41', '#CD3CF0', '#555CEB')
|
2021-06-22 19:53:23 +02:00
|
|
|
|
|
|
|
ggplot_obj <-
|
|
|
|
ggplot(map_data, aes(x = long, y = lat, group = group, fill = m)) +
|
|
|
|
geom_polygon(colour = "white") +
|
|
|
|
labs(title = 'No. of crashes in each country', x = '', y = '', fill = '') +
|
|
|
|
scale_fill_manual(values = map_pal, na.value = 'whitesmoke') +
|
|
|
|
theme(legend.position='right', legend.justification = "top") +
|
|
|
|
guides(fill = guide_legend(reverse = TRUE))
|
2021-06-23 14:40:48 +02:00
|
|
|
|
2021-06-22 19:53:23 +02:00
|
|
|
ggplotly(
|
|
|
|
p = ggplot_obj,
|
|
|
|
tooltip = "all",
|
|
|
|
dynamicTicks = FALSE,
|
|
|
|
layerData = 1,
|
|
|
|
originalData = TRUE
|
|
|
|
)
|
|
|
|
})
|
|
|
|
|
|
|
|
# poprawic szerkosc wierszy
|
|
|
|
output$allData <- DT::renderDataTable({
|
|
|
|
airplane_crashes %>%
|
2021-06-23 14:40:48 +02:00
|
|
|
# filter(Year == input$dataDates) %>%
|
2021-06-22 19:53:23 +02:00
|
|
|
DT::datatable()
|
|
|
|
})
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
shinyApp(ui = ui, server = server)
|
|
|
|
|
2021-06-23 14:40:48 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# tutaj sb rysuje wykresiki demo, zeby nie odpalac shiny za kazdym razem ####
|
|
|
|
|
|
|
|
# number of fatal accidents by operator - top 5
|
|
|
|
|
|
|
|
##
|
|
|
|
# extract and group by manufacturer
|
|
|
|
# main_type_wise_year <- airplane_crashes %>%
|
|
|
|
# select(Year, Type) %>%
|
|
|
|
# # replace model number by empty strings
|
|
|
|
# mutate(main_type = str_replace_all(Type, "[A-Za-z]*-?\\d+-?[A-Za-z]*.*", "")) %>%
|
|
|
|
# # skip empty strings row
|
|
|
|
# filter(main_type > 'A') %>%
|
|
|
|
# group_by(Year, main_type) %>%
|
|
|
|
# summarize(n = n()) %>%
|
|
|
|
# group_by(Year) %>%
|
|
|
|
# top_n(5, n)
|
|
|
|
# #%>% mutate(main_type = reorder_within(-n, main_type, Year))
|
|
|
|
#
|
|
|
|
# # i teraz z tego streamgraph
|
|
|
|
# main_type_wise_year <- main_type_wise_year %>% arrange(-n)
|
|
|
|
# main_type_wise_year %>%
|
|
|
|
# group_by(Year, main_type) %>%
|
|
|
|
# tally(wt=n) %>%
|
|
|
|
# streamgraph("main_type", "n", "Year", interpolate="cardinal") %>%
|
|
|
|
# sg_fill_brewer("PuOr") %>%
|
|
|
|
# sg_legend(TRUE)
|
|
|
|
|
|
|
|
####
|
|
|
|
# airplane_operators <- airplane_crashes %>% count(Operator, sort = TRUE)
|
|
|
|
# airplane_operators <- head(airplane_operators, 5)
|
|
|
|
# ggplot(airplane_operators, aes(x = Operator, y = n)) +
|
|
|
|
# geom_point(color = "#765432", alpha = 0.8) +
|
|
|
|
# labs(title = "No. of fatal accidents by operator", x = '', y = '')
|
|
|
|
#
|
|
|
|
# airplane_operators %>%
|
|
|
|
# streamgraph(key="Operator", value=, date="Year", offset="zero") %>%
|
|
|
|
# sg_fill_brewer("BuPu")
|
|
|
|
|
|
|
|
# wykres ile wypadkow kazdego operatora w kazdym roku
|