res }, data$shot$freeze_frame, data$x1) }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) tryCatch({ # TODO reduce error cases data$number_of_players_teammates <- mapply(function(sublist, x1_threshold) { # Extracting the first location value and converting it to numeric first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) if ("teammate" %in% names(sublist)) { # Filtering and counting res <- sum(sublist$teammate & first_location_values > x1_threshold) # error here } else { res <- 0 } res }, data$shot$freeze_frame, data$x1) }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) data$shot$body_part <- data$shot$body_part %>% select(-id) data$shot$technique <- data$shot$technique %>% select(-id) data$shot$type <- data$shot$type %>% select(-id) data$position <- data$position %>% select(-id) data$shot <- data$shot %>% select(-end_location) tryCatch({ # TODO reduce error cases if ("one_on_one" %in% colnames(data$shot)) { data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE } else { data$shot$one_on_one <- FALSE } if ("first_time" %in% colnames(data$shot)) { data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE } else { data$shot$first_time <- FALSE } if ("aerial_won" %in% colnames(data$shot)) { data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE } else { data$shot$aerial_won <- FALSE } if ("saved_to_post" %in% colnames(data$shot)) { data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE } else { data$shot$saved_to_post <- FALSE } if ("deflected" %in% colnames(data$shot)) { data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE } else { data$shot$deflected <- FALSE } if ("saved_off_target" %in% colnames(data$shot)) { data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE } else { data$shot$saved_off_target <- FALSE } if ("open_goal" %in% colnames(data$shot)) { data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE } else { data$shot$open_goal <- FALSE } if ("follows_dribble" %in% colnames(data$shot)) { data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE } else { data$shot$follows_dribble <- FALSE } if ("redirect" %in% colnames(data$shot)) { data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE } else { data$shot$redirect <- FALSE } if ("kick_off" %in% colnames(data$kick_off)) { data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE } else { data$kick_off <- FALSE } }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data <- data %>% mutate(is_goal = ifelse(shot$outcome$id == 97, 1, 0), x1 = yd_to_m(x1) %>% round(., digits = 1), y1 = yd_to_m(y1) %>% round(., digits = 1), angle = loc2angle(x1, y1) %>% round(., digits = 1)) %>% select(-location) data$shot$outcome <- data$shot$outcome %>% select(-id) data <- data %>% unnest(shot, names_sep = "_") %>% unnest(position, names_sep = "_") %>% unnest(shot_type, names_sep = "_") %>% unnest(shot_outcome, names_sep = "_") %>% unnest(shot_technique, names_sep = "_") %>% unnest(shot_body_part, names_sep = "_") data <- cbind(data, df_players_location) data } data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2) # TODO not all json data have these same keys/fields, customize function to cover all cases get_shots2 <- function(json_file) { data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) df_temp <- do.call(rbind, lapply(data$location, function(loc) c(120, 80) - loc)) colnames(df_temp) <- c("x1", "y1") data$x1 <- df_temp[,1] data$y1 <- df_temp[,2] tryCatch({ df_players_location <- mapply( function(sublist) { df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame() # df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt)) df <- cbind(df_players, sublist$teammate) colnames(df) <- c("x", "y", "teammate") na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df))) colnames(na_df) <- colnames(df) dff <- rbind(df, na_df) wider_df <- dff %>% mutate(row = row_number()) %>% pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player") wider_df }, data$shot$freeze_frame) }, error = function(e) { # handle the error print(json_file) print(paste("An error occurred:", e$message)) }) df_players_location <- df_players_location %>% t() # df <- do.call(rbind, df_test$shot$freeze_frame[[1]]$location) %>% as.data.frame() # dff <- cbind(df, df_test$shot$freeze_frame[[1]]$teammate) # colnames(dff) <- c("x", "y", "teammate") # # # na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(dff))) # colnames(na_df) <- colnames(dff) # dff <- rbind(dff, na_df) # # wider_df <- dff %>% # mutate(row = row_number()) %>% # pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player") tryCatch({ # TODO reduce error cases data$number_of_players_opponents <- mapply(function(sublist, x1_threshold) { # Extracting the first location value and converting it to numeric first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) if ("teammate" %in% names(sublist)) { # Filtering and counting res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here } else { res <- 0 } res }, data$shot$freeze_frame, data$x1) }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) tryCatch({ # TODO reduce error cases data$number_of_players_teammates <- mapply(function(sublist, x1_threshold) { # Extracting the first location value and converting it to numeric first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) if ("teammate" %in% names(sublist)) { # Filtering and counting res <- sum(sublist$teammate & first_location_values > x1_threshold) # error here } else { res <- 0 } res }, data$shot$freeze_frame, data$x1) }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) data$shot$body_part <- data$shot$body_part %>% select(-id) data$shot$technique <- data$shot$technique %>% select(-id) data$shot$type <- data$shot$type %>% select(-id) data$position <- data$position %>% select(-id) data$shot <- data$shot %>% select(-end_location) tryCatch({ # TODO reduce error cases if ("one_on_one" %in% colnames(data$shot)) { data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE } else { data$shot$one_on_one <- FALSE } if ("first_time" %in% colnames(data$shot)) { data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE } else { data$shot$first_time <- FALSE } if ("aerial_won" %in% colnames(data$shot)) { data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE } else { data$shot$aerial_won <- FALSE } if ("saved_to_post" %in% colnames(data$shot)) { data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE } else { data$shot$saved_to_post <- FALSE } if ("deflected" %in% colnames(data$shot)) { data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE } else { data$shot$deflected <- FALSE } if ("saved_off_target" %in% colnames(data$shot)) { data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE } else { data$shot$saved_off_target <- FALSE } if ("open_goal" %in% colnames(data$shot)) { data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE } else { data$shot$open_goal <- FALSE } if ("follows_dribble" %in% colnames(data$shot)) { data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE } else { data$shot$follows_dribble <- FALSE } if ("redirect" %in% colnames(data$shot)) { data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE } else { data$shot$redirect <- FALSE } if ("kick_off" %in% colnames(data$kick_off)) { data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE } else { data$kick_off <- FALSE } }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data <- data %>% mutate(is_goal = ifelse(shot$outcome$id == 97, 1, 0), x1 = yd_to_m(x1) %>% round(., digits = 1), y1 = yd_to_m(y1) %>% round(., digits = 1), angle = loc2angle(x1, y1) %>% round(., digits = 1)) %>% select(-location) data$shot$outcome <- data$shot$outcome %>% select(-id) data <- data %>% unnest(shot, names_sep = "_") %>% unnest(position, names_sep = "_") %>% unnest(shot_type, names_sep = "_") %>% unnest(shot_outcome, names_sep = "_") %>% unnest(shot_technique, names_sep = "_") %>% unnest(shot_body_part, names_sep = "_") data <- cbind(data, df_players_location) data } data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2) df <- fromJSON("data/la_liga_events/ (1006).json") %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) df$shot$freeze_frame[[1]]$location df$shot$freeze_frame[[1]]$teammate mapply( function(sublist) { df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame() # df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt)) df <- cbind(df_players, sublist$teammate) colnames(df) <- c("x", "y", "teammate") na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df))) colnames(na_df) <- colnames(df) dff <- rbind(df, na_df) wider_df <- dff %>% mutate(row = row_number()) %>% pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player") wider_df }, df$shot$freeze_frame) mapply( function(sublist) { df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame() # df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt)) print(df_players) df <- cbind(df_players, sublist$teammate) colnames(df) <- c("x", "y", "teammate") na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df))) colnames(na_df) <- colnames(df) dff <- rbind(df, na_df) wider_df <- dff %>% mutate(row = row_number()) %>% pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player") wider_df }, df$shot$freeze_frame) mapply( function(sublist) { df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame() # df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt)) print(df_players) print(sublist$teammate) df <- cbind(df_players, sublist$teammate) colnames(df) <- c("x", "y", "teammate") na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df))) colnames(na_df) <- colnames(df) dff <- rbind(df, na_df) wider_df <- dff %>% mutate(row = row_number()) %>% pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player") wider_df }, df$shot$freeze_frame) df$shot$freeze_frame[[1]]$teammate mapply( function(sublist) { df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame() # df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt)) print(df_players) print(sublist$teammate) df <- cbind(df_players, sublist$teammate) colnames(df) <- c("x", "y", "teammate") na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df))) colnames(na_df) <- colnames(df) dff <- rbind(df, na_df) wider_df <- dff %>% mutate(row = row_number()) %>% pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player") wider_df }, df$shot$freeze_frame) mapply( function(sublist) { df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame() # df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt)) df <- cbind(df_players, sublist$teammate) colnames(df) <- c("x", "y", "teammate") print(df) na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df))) colnames(na_df) <- colnames(df) dff <- rbind(df, na_df) wider_df <- dff %>% mutate(row = row_number()) %>% pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player") wider_df }, df$shot$freeze_frame) df$shot$freeze_frame[[3]]$teammate df$shot$freeze_frame[[10]]$teammate df$shot$freeze_frame[[115]]$teammate df$shot$freeze_frame[[11]]$teammate length(df$shot$freeze_frame) df$shot$freeze_frame[[25]]$teammate df$shot$freeze_frame[[24]]$teammate # TODO not all json data have these same keys/fields, customize function to cover all cases get_shots2 <- function(json_file) { data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) df_temp <- do.call(rbind, lapply(data$location, function(loc) c(120, 80) - loc)) colnames(df_temp) <- c("x1", "y1") data$x1 <- df_temp[,1] data$y1 <- df_temp[,2] tryCatch({ df_players_location <- mapply( function(sublist) { if (!is.null(sublist$teammate)) { df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame() # df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt)) df <- cbind(df_players, sublist$teammate) colnames(df) <- c("x", "y", "teammate") na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df))) colnames(na_df) <- colnames(df) dff <- rbind(df, na_df) } else { dff <- as.data.frame(matrix(NA, nrow = 21, ncol = 3)) colnames(dff) <- c("x", "y", "teammate") } wider_df <- dff %>% mutate(row = row_number()) %>% pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player") wider_df }, data$shot$freeze_frame) }, error = function(e) { # handle the error print(json_file) print(paste("An error occurred:", e$message)) }) df_players_location <- df_players_location %>% t() # df <- do.call(rbind, df_test$shot$freeze_frame[[1]]$location) %>% as.data.frame() # dff <- cbind(df, df_test$shot$freeze_frame[[1]]$teammate) # colnames(dff) <- c("x", "y", "teammate") # # # na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(dff))) # colnames(na_df) <- colnames(dff) # dff <- rbind(dff, na_df) # # wider_df <- dff %>% # mutate(row = row_number()) %>% # pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player") tryCatch({ # TODO reduce error cases data$number_of_players_opponents <- mapply(function(sublist, x1_threshold) { # Extracting the first location value and converting it to numeric first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) if ("teammate" %in% names(sublist)) { # Filtering and counting res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here } else { res <- 0 } res }, data$shot$freeze_frame, data$x1) }, error = function(e) { print(json_file) # handle the error print(paste("An error occurred:", e$message)) }) tryCatch({ # TODO reduce error cases data$number_of_players_teammates <- mapply(function(sublist, x1_threshold) { # Extracting the first location value and converting it to numeric first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) if ("teammate" %in% names(sublist)) { # Filtering and counting res <- sum(sublist$teammate & first_location_values > x1_threshold) # error here } else { res <- 0 } res }, data$shot$freeze_frame, data$x1) }, error = function(e) { print(json_file) # handle the error print(paste("An error occurred:", e$message)) }) data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) data$shot$body_part <- data$shot$body_part %>% select(-id) data$shot$technique <- data$shot$technique %>% select(-id) data$shot$type <- data$shot$type %>% select(-id) data$position <- data$position %>% select(-id) data$shot <- data$shot %>% select(-end_location) tryCatch({ # TODO reduce error cases if ("one_on_one" %in% colnames(data$shot)) { data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE } else { data$shot$one_on_one <- FALSE } if ("first_time" %in% colnames(data$shot)) { data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE } else { data$shot$first_time <- FALSE } if ("aerial_won" %in% colnames(data$shot)) { data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE } else { data$shot$aerial_won <- FALSE } if ("saved_to_post" %in% colnames(data$shot)) { data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE } else { data$shot$saved_to_post <- FALSE } if ("deflected" %in% colnames(data$shot)) { data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE } else { data$shot$deflected <- FALSE } if ("saved_off_target" %in% colnames(data$shot)) { data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE } else { data$shot$saved_off_target <- FALSE } if ("open_goal" %in% colnames(data$shot)) { data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE } else { data$shot$open_goal <- FALSE } if ("follows_dribble" %in% colnames(data$shot)) { data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE } else { data$shot$follows_dribble <- FALSE } if ("redirect" %in% colnames(data$shot)) { data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE } else { data$shot$redirect <- FALSE } if ("kick_off" %in% colnames(data$kick_off)) { data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE } else { data$kick_off <- FALSE } }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data <- data %>% mutate(is_goal = ifelse(shot$outcome$id == 97, 1, 0), x1 = yd_to_m(x1) %>% round(., digits = 1), y1 = yd_to_m(y1) %>% round(., digits = 1), angle = loc2angle(x1, y1) %>% round(., digits = 1)) %>% select(-location) data$shot$outcome <- data$shot$outcome %>% select(-id) data <- data %>% unnest(shot, names_sep = "_") %>% unnest(position, names_sep = "_") %>% unnest(shot_type, names_sep = "_") %>% unnest(shot_outcome, names_sep = "_") %>% unnest(shot_technique, names_sep = "_") %>% unnest(shot_body_part, names_sep = "_") data <- cbind(data, df_players_location) data } data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2) View(data3_final) combined_data <- do.call(rbind, data_list) data_list combined_data <- do.call(rbind, data_list) combined_data <- do.call(bind_rows, data_list) nrow(combined_data) data3_final <- combined_data %>% select(-c(shot_outcome_name, shot_saved_off_target, shot_saved_to_post)) %>% mutate(is_head = ifelse(shot_body_part_name == "Head", 1, 0), is_right_foot = ifelse(shot_body_part_name == "Right Foot", 1, 0), is_left_foot = ifelse(shot_body_part_name == "Left Foot", 1, 0), is_open_play = ifelse(shot_type_name == "Open Play", 1, 0)) %>% select(-shot_body_part_name) colnames(data3_final) write.csv(data3_final, file = "data/final_data.csv") write_csv(data3_final, file = "data/final_data.csv")