error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) data$shot$body_part <- data$shot$body_part %>% select(-id) data$shot$technique <- data$shot$technique %>% select(-id) data$shot$type <- data$shot$type %>% select(-id) data$position <- data$position %>% select(-id) data$shot <- data$shot %>% select(-end_location) tryCatch({ # TODO reduce error cases if ("one_on_one" %in% colnames(data$shot)) { data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE } else { data$shot$one_on_one <- FALSE } if ("first_time" %in% colnames(data$shot)) { data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE } else { data$shot$first_time <- FALSE } if ("aerial_won" %in% colnames(data$shot)) { data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE } else { data$shot$aerial_won <- FALSE } if ("saved_to_post" %in% colnames(data$shot)) { data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE } else { data$shot$saved_to_post <- FALSE } if ("deflected" %in% colnames(data$shot)) { data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE } else { data$shot$deflected <- FALSE } if ("saved_off_target" %in% colnames(data$shot)) { data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE } else { data$shot$saved_off_target <- FALSE } if ("open_goal" %in% colnames(data$shot)) { data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE } else { data$shot$open_goal <- FALSE } if ("follows_dribble" %in% colnames(data$shot)) { data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE } else { data$shot$follows_dribble <- FALSE } if ("redirect" %in% colnames(data$shot)) { data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE } else { data$shot$redirect <- FALSE } if ("kick_off" %in% colnames(data$kick_off)) { data[is.na(data$kick_off$kick_off), ]$kick_off$kick_off <- FALSE } else { data$kick_off$kick_off <- FALSE } }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)), is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>% select(-location) data$shot$outcome <- data$shot$outcome %>% select(-id) data <- data %>% unnest(shot, names_sep = "_") data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle) data } data_test <- get_shots2("data/la_liga_events/303377.json") View(data_test) # TODO not all json data have these same keys/fields, customize function to cover all cases get_shots2 <- function(json_file) { data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) df_temp <- do.call(rbind, data$location) colnames(df_temp) <- c("x1", "y1") data$x1 <- df_temp[,1] data$y1 <- df_temp[,2] tryCatch({ # TODO reduce error cases data$number_of_players <- mapply(function(sublist, x1_threshold) { # Extracting the first location value and converting it to numeric first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) if ("teammate" %in% names(sublist)) { # Filtering and counting res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here } else { res <- 0 } res }, data$shot$freeze_frame, data$x1) }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) data$shot$body_part <- data$shot$body_part %>% select(-id) data$shot$technique <- data$shot$technique %>% select(-id) data$shot$type <- data$shot$type %>% select(-id) data$position <- data$position %>% select(-id) data$shot <- data$shot %>% select(-end_location) tryCatch({ # TODO reduce error cases if ("one_on_one" %in% colnames(data$shot)) { data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE } else { data$shot$one_on_one <- FALSE } if ("first_time" %in% colnames(data$shot)) { data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE } else { data$shot$first_time <- FALSE } if ("aerial_won" %in% colnames(data$shot)) { data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE } else { data$shot$aerial_won <- FALSE } if ("saved_to_post" %in% colnames(data$shot)) { data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE } else { data$shot$saved_to_post <- FALSE } if ("deflected" %in% colnames(data$shot)) { data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE } else { data$shot$deflected <- FALSE } if ("saved_off_target" %in% colnames(data$shot)) { data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE } else { data$shot$saved_off_target <- FALSE } if ("open_goal" %in% colnames(data$shot)) { data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE } else { data$shot$open_goal <- FALSE } if ("follows_dribble" %in% colnames(data$shot)) { data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE } else { data$shot$follows_dribble <- FALSE } if ("redirect" %in% colnames(data$shot)) { data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE } else { data$shot$redirect <- FALSE } if ("kick_off" %in% colnames(data$kick_off)) { data[is.na(data$kick_off$kick_off), ]$kick_off$kick_off <- FALSE } else { data$kick_off$kick_off <- FALSE } }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)), is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>% select(-location) data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle) data$shot$outcome <- data$shot$outcome %>% select(-id) data <- data %>% unnest(shot, names_sep = "_") %>% unnest(position, names_sep = "_") # TODO not all json data have these same keys/fields, customize function to cover all cases get_shots2 <- function(json_file) { data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) df_temp <- do.call(rbind, data$location) colnames(df_temp) <- c("x1", "y1") data$x1 <- df_temp[,1] data$y1 <- df_temp[,2] tryCatch({ # TODO reduce error cases data$number_of_players <- mapply(function(sublist, x1_threshold) { # Extracting the first location value and converting it to numeric first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) if ("teammate" %in% names(sublist)) { # Filtering and counting res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here } else { res <- 0 } res }, data$shot$freeze_frame, data$x1) }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) data$shot$body_part <- data$shot$body_part %>% select(-id) data$shot$technique <- data$shot$technique %>% select(-id) data$shot$type <- data$shot$type %>% select(-id) data$position <- data$position %>% select(-id) data$shot <- data$shot %>% select(-end_location) tryCatch({ # TODO reduce error cases if ("one_on_one" %in% colnames(data$shot)) { data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE } else { data$shot$one_on_one <- FALSE } if ("first_time" %in% colnames(data$shot)) { data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE } else { data$shot$first_time <- FALSE } if ("aerial_won" %in% colnames(data$shot)) { data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE } else { data$shot$aerial_won <- FALSE } if ("saved_to_post" %in% colnames(data$shot)) { data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE } else { data$shot$saved_to_post <- FALSE } if ("deflected" %in% colnames(data$shot)) { data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE } else { data$shot$deflected <- FALSE } if ("saved_off_target" %in% colnames(data$shot)) { data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE } else { data$shot$saved_off_target <- FALSE } if ("open_goal" %in% colnames(data$shot)) { data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE } else { data$shot$open_goal <- FALSE } if ("follows_dribble" %in% colnames(data$shot)) { data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE } else { data$shot$follows_dribble <- FALSE } if ("redirect" %in% colnames(data$shot)) { data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE } else { data$shot$redirect <- FALSE } if ("kick_off" %in% colnames(data$kick_off)) { data[is.na(data$kick_off$kick_off), ]$kick_off$kick_off <- FALSE } else { data$kick_off$kick_off <- FALSE } }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)), is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>% select(-location) data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle) data$shot$outcome <- data$shot$outcome %>% select(-id) data <- data %>% unnest(shot, names_sep = "_") %>% unnest(position, names_sep = "_") %>% unnest(shot_type, names_sep = "_") %>% unnest(shot_outcome, names_sep = "_") %>% unnest(shot_technique, names_sep = "_") data } data_test <- get_shots2("data/la_liga_events/303377.json") View(data_test) write.csv(data_test, file = "data/data_test.csv") colnames(data_test) data_test$kick_off data_test$shot_body_part data_df <- as.data.frame(data_test) # TODO not all json data have these same keys/fields, customize function to cover all cases get_shots2 <- function(json_file) { data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) df_temp <- do.call(rbind, data$location) colnames(df_temp) <- c("x1", "y1") data$x1 <- df_temp[,1] data$y1 <- df_temp[,2] tryCatch({ # TODO reduce error cases data$number_of_players <- mapply(function(sublist, x1_threshold) { # Extracting the first location value and converting it to numeric first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) if ("teammate" %in% names(sublist)) { # Filtering and counting res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here } else { res <- 0 } res }, data$shot$freeze_frame, data$x1) }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) data$shot$body_part <- data$shot$body_part %>% select(-id) data$shot$technique <- data$shot$technique %>% select(-id) data$shot$type <- data$shot$type %>% select(-id) data$position <- data$position %>% select(-id) data$shot <- data$shot %>% select(-end_location) tryCatch({ # TODO reduce error cases if ("one_on_one" %in% colnames(data$shot)) { data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE } else { data$shot$one_on_one <- FALSE } if ("first_time" %in% colnames(data$shot)) { data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE } else { data$shot$first_time <- FALSE } if ("aerial_won" %in% colnames(data$shot)) { data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE } else { data$shot$aerial_won <- FALSE } if ("saved_to_post" %in% colnames(data$shot)) { data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE } else { data$shot$saved_to_post <- FALSE } if ("deflected" %in% colnames(data$shot)) { data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE } else { data$shot$deflected <- FALSE } if ("saved_off_target" %in% colnames(data$shot)) { data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE } else { data$shot$saved_off_target <- FALSE } if ("open_goal" %in% colnames(data$shot)) { data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE } else { data$shot$open_goal <- FALSE } if ("follows_dribble" %in% colnames(data$shot)) { data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE } else { data$shot$follows_dribble <- FALSE } if ("redirect" %in% colnames(data$shot)) { data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE } else { data$shot$redirect <- FALSE } if ("kick_off" %in% colnames(data$kick_off)) { data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE } else { data$kick_off <- FALSE } }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)), is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>% select(-location) data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle) data$shot$outcome <- data$shot$outcome %>% select(-id) data <- data %>% unnest(shot, names_sep = "_") %>% unnest(position, names_sep = "_") %>% unnest(shot_type, names_sep = "_") %>% unnest(shot_outcome, names_sep = "_") %>% unnest(shot_technique, names_sep = "_") %>% unnest(shot_body_part, names_sep = "_") data } data_test <- get_shots2("data/la_liga_events/303377.json") write.csv(data_test, file = "data/data_test.csv") file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json") data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2) combined_data <- do.call(rbind, data_list) data_test <- data <- fromJSON(("data/la_liga_events/303377.json") %>% filter(type$name == "Shot") data_test <- fromJSON("data/la_liga_events/303377.json") %>% filter(type$name == "Shot") data_test <- fromJSON("data/la_liga_events/303377.json") %>% filter(type$name == "Shot") data_test$id data_test$index data4 <- read.csv("data/data4.csv", nrows = 1000) View(data4) # TODO not all json data have these same keys/fields, customize function to cover all cases get_shots2 <- function(json_file) { data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) df_temp <- do.call(rbind, data$location) colnames(df_temp) <- c("x1", "y1") data$x1 <- df_temp[,1] data$y1 <- df_temp[,2] tryCatch({ # TODO reduce error cases data$number_of_players_opponents <- mapply(function(sublist, x1_threshold) { # Extracting the first location value and converting it to numeric first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) if ("teammate" %in% names(sublist)) { # Filtering and counting res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here } else { res <- 0 } res }, data$shot$freeze_frame, data$x1) }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) tryCatch({ # TODO reduce error cases data$number_of_players_teammates <- mapply(function(sublist, x1_threshold) { # Extracting the first location value and converting it to numeric first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) if ("teammate" %in% names(sublist)) { # Filtering and counting res <- sum(sublist$teammate & first_location_values > x1_threshold) # error here } else { res <- 0 } res }, data$shot$freeze_frame, data$x1) }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) data$shot$body_part <- data$shot$body_part %>% select(-id) data$shot$technique <- data$shot$technique %>% select(-id) data$shot$type <- data$shot$type %>% select(-id) data$position <- data$position %>% select(-id) data$shot <- data$shot %>% select(-end_location) tryCatch({ # TODO reduce error cases if ("one_on_one" %in% colnames(data$shot)) { data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE } else { data$shot$one_on_one <- FALSE } if ("first_time" %in% colnames(data$shot)) { data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE } else { data$shot$first_time <- FALSE } if ("aerial_won" %in% colnames(data$shot)) { data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE } else { data$shot$aerial_won <- FALSE } if ("saved_to_post" %in% colnames(data$shot)) { data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE } else { data$shot$saved_to_post <- FALSE } if ("deflected" %in% colnames(data$shot)) { data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE } else { data$shot$deflected <- FALSE } if ("saved_off_target" %in% colnames(data$shot)) { data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE } else { data$shot$saved_off_target <- FALSE } if ("open_goal" %in% colnames(data$shot)) { data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE } else { data$shot$open_goal <- FALSE } if ("follows_dribble" %in% colnames(data$shot)) { data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE } else { data$shot$follows_dribble <- FALSE } if ("redirect" %in% colnames(data$shot)) { data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE } else { data$shot$redirect <- FALSE } if ("kick_off" %in% colnames(data$kick_off)) { data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE } else { data$kick_off <- FALSE } }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)), is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>% select(-location) data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle) data$shot$outcome <- data$shot$outcome %>% select(-id) data <- data %>% unnest(shot, names_sep = "_") %>% unnest(position, names_sep = "_") %>% unnest(shot_type, names_sep = "_") %>% unnest(shot_outcome, names_sep = "_") %>% unnest(shot_technique, names_sep = "_") %>% unnest(shot_body_part, names_sep = "_") data } # sample data data_test <- get_shots2("data/la_liga_events/303377.json") write.csv(data_test, file = "data/data3_test.csv") data1 <- read.csv("data/data1.csv", nrows = 1000) colnames(data1) View(data1) data2 <- read.csv("data/data2.csv", nrows = 1000) View(data2) data3 <- read.csv("data/data3_test.csv", nrows = 1000) colnames(data3) View(data3)