added dataset + shiny app with map, barplot and data table

This commit is contained in:
patrycjalazna 2021-06-22 19:53:23 +02:00
parent 2350e8fdd5
commit cd5275b075
2 changed files with 5216 additions and 0 deletions

4976
proj3/airplane_crashes.csv Executable file

File diff suppressed because it is too large Load Diff

240
proj3/projekt.R Normal file
View File

@ -0,0 +1,240 @@
# setup srodowiska ####
setwd('/Users/patrycjalazna/Desktop/wizualizacja-danych/projekt_3/')
install.packages('shinyjs')
# importy ####
library(shiny)
library(shinyTime)
library(shinyjs)
library(highcharter)
library(splusTimeDate)
library(tidyverse)
library(lubridate)
library(gridExtra)
library(usmap)
library(tidytext)
library(tm)
library(SnowballC)
library(wordcloud)
library(dplyr)
library(streamgraph)
library(plotly)
library(DT)
# zaladowanie datasetu ####
airplane_crashes <- read.csv('airplane_crashes.csv')
# podsumowanie ####
summary <- summary(airplane_crashes)
colnames(airplane_crashes)
# preprocessing ####
# zmiana nazwy kolumn
colnames(airplane_crashes) <- c('Date', 'Time', 'Location', 'Operator', 'Flight', 'Route', 'Type',
'Registration', 'cn/ln', 'Total_Onboard', 'Passengers_Onboard',
'Crew_Onboard', 'Total_Fatalities', 'Passengers_Fatalities',
'Crew_Fatalities', 'Ground', 'Summary')
# konwersja na prawidlowy format Date
airplane_crashes$Date <- mdy(airplane_crashes$Date)
airplane_crashes$Time <- hm(airplane_crashes$Time)
# ekstrakcja informacji z datasetu, zapis w poprawnym formacie
# ekstrakcja informacji z kolumny Date
airplane_crashes$Year <- year(airplane_crashes$Date)
airplane_crashes$Month <- as.factor(month(airplane_crashes$Date))
airplane_crashes$Day <- as.factor(day(airplane_crashes$Date))
airplane_crashes$Weekday <- as.factor(wday(airplane_crashes$Date))
airplane_crashes$Is_Leap_Year <- leap_year(airplane_crashes$Date)
airplane_crashes$Decade <- year(floor_date(airplane_crashes$Date, years(10)))
airplane_crashes$Date <- format(as.Date(airplane_crashes$Date, '%m/%d/%Y'), '%d/%m/%Y')
# ekstrakcja godziny, minuty i AM/PM z kolumny Time
airplane_crashes$Hour <- as.integer(hour(airplane_crashes$Time))
airplane_crashes$Minute <- as.factor(minute(airplane_crashes$Time))
airplane_crashes$AM_PM <- if_else(am(airplane_crashes$Time), 'AM', 'PM')
airplane_crashes$btwn_6PM_6AM <- if_else
# ShinyApp ####
load_data <- function() {
Sys.sleep(2)
hide("loading_page")
show("main_content")
}
ui <- fluidPage(
useShinyjs(),
div(
id = "loading_page",
h1("Loading...")
),
hidden(
div(
id = "main_content",
navbarPage("Airplane crashes from 1908 to 2020",
tabPanel("General overview",
sidebarLayout(
sidebarPanel(
# wybór daty
dateRangeInput('dates',
'Date range:',
min(airplane_crashes$Date),
max(airplane_crashes$Date)),
# wybor godziny
timeInput("time", "Time (local):", seconds = FALSE)
),
mainPanel(
# wykresiki
plotlyOutput("weekdayCrashes")
)
)
),
tabPanel("Map",
mainPanel(
# mapy
plotlyOutput("mapPlot")
#plotOutput("mapPlot")
)
),
tabPanel("Data",
sidebarPanel(
dateRangeInput('dataDates',
'Date range:',
min(airplane_crashes$Date),
max(airplane_crashes$Date))
),
mainPanel(
DT::dataTableOutput('allData')
))
)
)
)
)
server <- function(input, output, session) {
load_data()
# godzina
observe(print(strftime(input$time, "%R")))
output$weekdayCrashes <- renderPlotly({
week_day <- airplane_crashes %>% count(Weekday)
week_day$Weekday[week_day$Weekday == "1"] <- "Monday"
# c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')
plot_ly(
x = week_day$Weekday, y = week_day$n,
name = 'Week Day Crashes', type = "bar"
) %>%
layout(title = 'Week Day Crashes',
xaxis = list(title = "Weekdays",
zeroline = FALSE),
yaxis = list(title = "No. of crashes",
zeroline = FALSE))
})
output$mapPlot <- renderPlotly({
states_list <- c('Alabama','Alaska','Alaksa','Arizona','Arkansas',"California",
"Colorado", "Connecticut","Delaware","Florida","Georgia",
"Hawaii","Idaho","Illinois", "Indiana","Iowa","Kansas",
"Kentucky","Louisiana","Maine","Maryland", "Massachusetts",
"Massachusett", "Michigan","Minnesota","Mississippi","Missouri",
"Montana", "Nebraska","Nevada","New Hampshire","New Jersey",
"New Mexico","New York", "North Carolina","North Dakota","Ohio",
"Oklahoma", "Oklohoma", "Oregon","Pennsylvania", "Rhode Island",
"South Carolina",
"South Dakota",'Tennesee',"Tennessee","Texas","Utah", "Vermont",
'Virginia',"Washington D.C.", "Washington, D.C.", "Washington",
"West Virginia","Wisconsin","Wyoming",
"AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA",
"HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
"MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
"NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY")
location <- airplane_crashes %>%
select(Location)
for(state in states_list) {
location <- location %>%
mutate(Location = str_replace_all(Location, state, paste(state, ', USA', sep = ''))) %>%
mutate(Location = str_replace_all(Location, 'USA.*, ', '')) %>%
mutate(Location = str_replace(Location, 'West Virginia, USA,', '')) %>%
mutate(Location = str_replace(Location, 'Afghanstan', 'Afghanistan')) %>%
mutate(Location = str_replace(Location, 'Airzona|Arazona', 'Arizona')) %>%
mutate(Location = str_replace(Location, 'Alakska', 'Alaska')) %>%
mutate(Location = str_replace(Location, 'Cailifornia|Calilfornia', 'California')) %>%
mutate(Location = str_replace(Location, 'D.*Congo', 'DR Congo')) %>%
mutate(Location = str_replace(Location, 'Domincan Republic', 'Dominican Republic')) %>%
mutate(Location = str_replace(Location, 'Hati', 'Haiti')) %>%
mutate(Location = str_replace(Location, ' International Airport', '')) %>%
mutate(Location = str_replace(Location, 'Morrocco|Morroco', 'Morocco')) %>%
mutate(Location = str_replace(Location, 'Phillipines', 'Phillipines')) %>%
mutate(Location = str_replace(Location, 'Burma', 'Myanmar')) %>%
mutate(Location = str_replace(Location, '([Ss]outhern|[Nn]orthern|[Ww]estern|[Ee]astern) ', ''))}
country_state <- location %>%
select(Location) %>%
filter(!str_detect(Location, '[Oo]cean|[Ss]ea|[Cc]hannel|Gulf of')) %>%
mutate(Location = str_replace(Location, '(Near|Off|Over) ', '')) %>%
mutate(Location = str_replace(Location, 'USA, Australia', 'Australia')) %>%
mutate(State_Province = str_replace(Location, '(.*, )?(.*), (.*)', '\\2')) %>%
mutate(Country = str_replace(Location, '.*,\\s*', ''))
cntry <- country_state %>%
group_by(Country) %>%
summarize(n = n()) %>%
arrange(desc(n))
# podzial na kategorie wg liczby wypadkow w danym kraju
cntry <- cntry %>%
mutate(m = case_when(
n > 200 ~ "200 +",
n < 200 & n >= 100 ~ "199 - 100",
n < 100 & n >= 50 ~ "99 - 50",
n < 50 & n >= 10 ~ "49 - 10",
n < 10 ~ "< 10")) %>%
mutate(m = factor(m, levels = c("< 10", "49 - 10", "99 - 50", "199 - 100", "200 +")))
world_map <- map_data("world")
map_data <- cntry %>%
full_join(world_map, by = c('Country' = 'region'))
map_pal = c("#E0DFD5", "#E4B363", "#E97F02", '#EF6461', '#313638')
ggplot_obj <-
ggplot(map_data, aes(x = long, y = lat, group = group, fill = m)) +
geom_polygon(colour = "white") +
labs(title = 'No. of crashes in each country', x = '', y = '', fill = '') +
scale_fill_manual(values = map_pal, na.value = 'whitesmoke') +
theme(legend.position='right', legend.justification = "top") +
guides(fill = guide_legend(reverse = TRUE))
ggplotly(
p = ggplot_obj,
tooltip = "all",
dynamicTicks = FALSE,
layerData = 1,
originalData = TRUE
)
})
# poprawic szerkosc wierszy
output$allData <- DT::renderDataTable({
airplane_crashes %>%
# filter(Year == input$dataDates) %>%
DT::datatable()
})
}
shinyApp(ui = ui, server = server)