data4 <- read.csv("data/data4.csv") setwd("~/Desktop/fantastyczne_gole") data4 <- read.csv("data/data4.csv") nrow nrow(data4) data_list file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json") length(file_names) data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2) library(jsonlite) library(tidyverse) library(ggsoccer) library(dplyr) # TODO not all json data have these same keys/fields, customize function to cover all cases get_shots2 <- function(json_file) { data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) df_temp <- do.call(rbind, data$location) colnames(df_temp) <- c("x1", "y1") data$x1 <- df_temp[,1] data$y1 <- df_temp[,2] tryCatch({ # TODO reduce error cases data$number_of_players_opponents <- mapply(function(sublist, x1_threshold) { # Extracting the first location value and converting it to numeric first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) if ("teammate" %in% names(sublist)) { # Filtering and counting res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here } else { res <- 0 } res }, data$shot$freeze_frame, data$x1) }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) tryCatch({ # TODO reduce error cases data$number_of_players_teammates <- mapply(function(sublist, x1_threshold) { # Extracting the first location value and converting it to numeric first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) if ("teammate" %in% names(sublist)) { # Filtering and counting res <- sum(sublist$teammate & first_location_values > x1_threshold) # error here } else { res <- 0 } res }, data$shot$freeze_frame, data$x1) }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) data$shot$body_part <- data$shot$body_part %>% select(-id) data$shot$technique <- data$shot$technique %>% select(-id) data$shot$type <- data$shot$type %>% select(-id) data$position <- data$position %>% select(-id) data$shot <- data$shot %>% select(-end_location) tryCatch({ # TODO reduce error cases if ("one_on_one" %in% colnames(data$shot)) { data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE } else { data$shot$one_on_one <- FALSE } if ("first_time" %in% colnames(data$shot)) { data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE } else { data$shot$first_time <- FALSE } if ("aerial_won" %in% colnames(data$shot)) { data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE } else { data$shot$aerial_won <- FALSE } if ("saved_to_post" %in% colnames(data$shot)) { data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE } else { data$shot$saved_to_post <- FALSE } if ("deflected" %in% colnames(data$shot)) { data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE } else { data$shot$deflected <- FALSE } if ("saved_off_target" %in% colnames(data$shot)) { data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE } else { data$shot$saved_off_target <- FALSE } if ("open_goal" %in% colnames(data$shot)) { data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE } else { data$shot$open_goal <- FALSE } if ("follows_dribble" %in% colnames(data$shot)) { data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE } else { data$shot$follows_dribble <- FALSE } if ("redirect" %in% colnames(data$shot)) { data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE } else { data$shot$redirect <- FALSE } if ("kick_off" %in% colnames(data$kick_off)) { data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE } else { data$kick_off <- FALSE } }, error = function(e) { # handle the error print(paste("An error occurred:", e$message)) }) data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)), is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>% select(-location) data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle) data$shot$outcome <- data$shot$outcome %>% select(-id) data <- data %>% unnest(shot, names_sep = "_") %>% unnest(position, names_sep = "_") %>% unnest(shot_type, names_sep = "_") %>% unnest(shot_outcome, names_sep = "_") %>% unnest(shot_technique, names_sep = "_") %>% unnest(shot_body_part, names_sep = "_") data } data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2) length(data_list) ss <- lapply(data_list, nrow()) ss <- lapply(data_list, nrow) ss sum(ss) sss <- unlist(ss) sss sum(sss) source("notebooks/dataCleaning.R") source("notebooks/dataCleaning.R") # generate random data random_data <- as.data.frame(x = rnorm(100), y = rnorm(100, mean = 1, sd = 3)) # unit tests expect_silent( data <- get_shots(file_path = "data/events/events_England.json", name_detail = "EN") ) library(tinytest) # unit tests expect_silent( data <- get_shots(file_path = "data/events/events_England.json", name_detail = "EN") ) expect_silent( data <- get_shots2(json_file = "data/la_liga_events/ (1).json") ) expect_error( get_final_data(data = random_data) ) data1 <- get_shots2(json_file = "data/la_liga_events/ (1).json") data2 <- get_shots2(json_file = "data/la_liga_events/ (2).json") expect_equivalent(colnames(data1), colnames(data2)) expect_false(is.vector(data1)) expect_true(is.data.frame(data1)) colnames(data1) colnames(data2) expect_length(length(colnames(data1)), length(colnames(data2))) length(colnames(data1)) length(colnames(data2)) ?expect_length expect_length(colnames(data1), colnames(data2)) expect_length(colnames(data1), colnames(data2)) expect_length(colnames(data1), 22) expect_length(colnames(data2), 22) expect_false(is.vector(data1)) expect_true(is.data.frame(data1)) df <- read.csv("data/data4.csv", nrows = 1000) View(df)