data cleaning fix

This commit is contained in:
LukaszChrostowski 2023-12-27 09:33:36 +01:00
parent 3cec9b5ebb
commit 695d8461a7
3 changed files with 38351 additions and 38347 deletions

View File

@ -409,4 +409,4 @@
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -181,7 +181,9 @@ get_shots2 <- function(json_file) {
# %>%
# stop("123")
wider_df <- dff %>%
pivot_wider(names_from = position_teammate, values_from = c(x, y), names_sep = "_player_")
pivot_wider(names_from = position_teammate, values_from = c(x, y), names_sep = "_player_") %>%
mutate(across(everything(), as.numeric))
wider_df
# wider_df <- apply(wider_df, MARGIN = 2, unlist)
}, data$shot$freeze_frame)
},
@ -328,6 +330,7 @@ get_shots2 <- function(json_file) {
file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json")
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
combined_data <- bind_rows(data_list)
skimr::skim(combined_data)
# # sample data
# data <- fromJSON("data/la_liga_events/ (1000).json") %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot))
@ -337,9 +340,10 @@ data3_final <- combined_data %>% select(-c(shot_outcome_name,
shot_saved_to_post,
kick_off)) %>%
mutate(shot_kick_off = ifelse(is.na(shot_kick_off), FALSE, shot_kick_off))
pattern <- "^(x_player_|y_player_)([0-9]|Goalkeeper)+$"
pattern <- "^(x_player_|y_player_).*$"
cols <- names(data3_final)[grepl(pattern, names(data3_final))]
data3_final <- data3_final %>% unnest(all_of(cols))
write_csv(data3_final, file = "data/final_data.csv")
data_final <- data3_final %>% unnest(all_of(cols))
skimr::skim(data_final)
write_csv(data_final, file = "data/final_data.csv")
# df_test <- read.csv("data/final_data.csv", nrows = 100)
##################### The fourth dataset ##############################