small fixes in final data loading

This commit is contained in:
LukaszChrostowski 2023-12-12 22:03:51 +01:00
parent c099e92132
commit 913bfc5275
5 changed files with 38771 additions and 38422 deletions

461
.Rhistory
View File

@ -1,23 +1,145 @@
data4 <- read.csv("data/data4.csv") res
setwd("~/Desktop/fantastyczne_gole") }, data$shot$freeze_frame, data$x1)
data4 <- read.csv("data/data4.csv") },
nrow error = function(e) {
nrow(data4) # handle the error
data_list print(paste("An error occurred:", e$message))
file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json") })
length(file_names) tryCatch({ # TODO reduce error cases
data$number_of_players_teammates <- mapply(function(sublist, x1_threshold) {
# Extracting the first location value and converting it to numeric
first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1]))
if ("teammate" %in% names(sublist)) {
# Filtering and counting
res <- sum(sublist$teammate & first_location_values > x1_threshold) # error here
} else {
res <- 0
}
res
}, data$shot$freeze_frame, data$x1)
},
error = function(e) {
# handle the error
print(paste("An error occurred:", e$message))
})
data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id)
data$shot$body_part <- data$shot$body_part %>% select(-id)
data$shot$technique <- data$shot$technique %>% select(-id)
data$shot$type <- data$shot$type %>% select(-id)
data$position <- data$position %>% select(-id)
data$shot <- data$shot %>% select(-end_location)
tryCatch({ # TODO reduce error cases
if ("one_on_one" %in% colnames(data$shot)) {
data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE
} else {
data$shot$one_on_one <- FALSE
}
if ("first_time" %in% colnames(data$shot)) {
data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE
} else {
data$shot$first_time <- FALSE
}
if ("aerial_won" %in% colnames(data$shot)) {
data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE
} else {
data$shot$aerial_won <- FALSE
}
if ("saved_to_post" %in% colnames(data$shot)) {
data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE
} else {
data$shot$saved_to_post <- FALSE
}
if ("deflected" %in% colnames(data$shot)) {
data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE
} else {
data$shot$deflected <- FALSE
}
if ("saved_off_target" %in% colnames(data$shot)) {
data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE
} else {
data$shot$saved_off_target <- FALSE
}
if ("open_goal" %in% colnames(data$shot)) {
data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE
} else {
data$shot$open_goal <- FALSE
}
if ("follows_dribble" %in% colnames(data$shot)) {
data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE
} else {
data$shot$follows_dribble <- FALSE
}
if ("redirect" %in% colnames(data$shot)) {
data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE
} else {
data$shot$redirect <- FALSE
}
if ("kick_off" %in% colnames(data$kick_off)) {
data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE
} else {
data$kick_off <- FALSE
}
},
error = function(e) {
# handle the error
print(paste("An error occurred:", e$message))
})
data <- data %>% mutate(is_goal = ifelse(shot$outcome$id == 97, 1, 0),
x1 = yd_to_m(x1) %>% round(., digits = 1),
y1 = yd_to_m(y1) %>% round(., digits = 1),
angle = loc2angle(x1, y1) %>% round(., digits = 1)) %>%
select(-location)
data$shot$outcome <- data$shot$outcome %>% select(-id)
data <- data %>% unnest(shot, names_sep = "_") %>%
unnest(position, names_sep = "_") %>%
unnest(shot_type, names_sep = "_") %>%
unnest(shot_outcome, names_sep = "_") %>%
unnest(shot_technique, names_sep = "_") %>%
unnest(shot_body_part, names_sep = "_")
data <- cbind(data, df_players_location)
data
}
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2) data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
library(jsonlite)
library(tidyverse)
library(ggsoccer)
library(dplyr)
# TODO not all json data have these same keys/fields, customize function to cover all cases # TODO not all json data have these same keys/fields, customize function to cover all cases
get_shots2 <- function(json_file) { get_shots2 <- function(json_file) {
data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot))
df_temp <- do.call(rbind, data$location) df_temp <- do.call(rbind, lapply(data$location, function(loc) c(120, 80) - loc))
colnames(df_temp) <- c("x1", "y1") colnames(df_temp) <- c("x1", "y1")
data$x1 <- df_temp[,1] data$x1 <- df_temp[,1]
data$y1 <- df_temp[,2] data$y1 <- df_temp[,2]
tryCatch({
df_players_location <- mapply( function(sublist) {
df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
df <- cbind(df_players, sublist$teammate)
colnames(df) <- c("x", "y", "teammate")
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
colnames(na_df) <- colnames(df)
dff <- rbind(df, na_df)
wider_df <- dff %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
wider_df
}, data$shot$freeze_frame)
},
error = function(e) {
# handle the error
print(json_file)
print(paste("An error occurred:", e$message))
})
df_players_location <- df_players_location %>% t()
# df <- do.call(rbind, df_test$shot$freeze_frame[[1]]$location) %>% as.data.frame()
# dff <- cbind(df, df_test$shot$freeze_frame[[1]]$teammate)
# colnames(dff) <- c("x", "y", "teammate")
#
#
# na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(dff)))
# colnames(na_df) <- colnames(dff)
# dff <- rbind(dff, na_df)
#
# wider_df <- dff %>%
# mutate(row = row_number()) %>%
# pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
tryCatch({ # TODO reduce error cases tryCatch({ # TODO reduce error cases
data$number_of_players_opponents <- mapply(function(sublist, x1_threshold) { data$number_of_players_opponents <- mapply(function(sublist, x1_threshold) {
# Extracting the first location value and converting it to numeric # Extracting the first location value and converting it to numeric
@ -114,10 +236,11 @@ error = function(e) {
# handle the error # handle the error
print(paste("An error occurred:", e$message)) print(paste("An error occurred:", e$message))
}) })
data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)), data <- data %>% mutate(is_goal = ifelse(shot$outcome$id == 97, 1, 0),
is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>% x1 = yd_to_m(x1) %>% round(., digits = 1),
y1 = yd_to_m(y1) %>% round(., digits = 1),
angle = loc2angle(x1, y1) %>% round(., digits = 1)) %>%
select(-location) select(-location)
data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle)
data$shot$outcome <- data$shot$outcome %>% select(-id) data$shot$outcome <- data$shot$outcome %>% select(-id)
data <- data %>% unnest(shot, names_sep = "_") %>% data <- data %>% unnest(shot, names_sep = "_") %>%
unnest(position, names_sep = "_") %>% unnest(position, names_sep = "_") %>%
@ -125,53 +248,265 @@ unnest(shot_type, names_sep = "_") %>%
unnest(shot_outcome, names_sep = "_") %>% unnest(shot_outcome, names_sep = "_") %>%
unnest(shot_technique, names_sep = "_") %>% unnest(shot_technique, names_sep = "_") %>%
unnest(shot_body_part, names_sep = "_") unnest(shot_body_part, names_sep = "_")
data <- cbind(data, df_players_location)
data data
} }
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2) data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
length(data_list) df <- fromJSON("data/la_liga_events/ (1006).json") %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot))
ss <- lapply(data_list, nrow()) df$shot$freeze_frame[[1]]$location
ss <- lapply(data_list, nrow) df$shot$freeze_frame[[1]]$teammate
ss mapply( function(sublist) {
sum(ss) df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
sss <- unlist(ss) # df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
sss df <- cbind(df_players, sublist$teammate)
sum(sss) colnames(df) <- c("x", "y", "teammate")
source("notebooks/dataCleaning.R") na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
source("notebooks/dataCleaning.R") colnames(na_df) <- colnames(df)
# generate random data dff <- rbind(df, na_df)
random_data <- as.data.frame(x = rnorm(100), wider_df <- dff %>%
y = rnorm(100, mean = 1, sd = 3)) mutate(row = row_number()) %>%
# unit tests pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
expect_silent( wider_df
data <- get_shots(file_path = "data/events/events_England.json", name_detail = "EN") }, df$shot$freeze_frame)
) mapply( function(sublist) {
library(tinytest) df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
# unit tests # df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
expect_silent( print(df_players)
data <- get_shots(file_path = "data/events/events_England.json", name_detail = "EN") df <- cbind(df_players, sublist$teammate)
) colnames(df) <- c("x", "y", "teammate")
expect_silent( na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
data <- get_shots2(json_file = "data/la_liga_events/ (1).json") colnames(na_df) <- colnames(df)
) dff <- rbind(df, na_df)
expect_error( wider_df <- dff %>%
get_final_data(data = random_data) mutate(row = row_number()) %>%
) pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
data1 <- get_shots2(json_file = "data/la_liga_events/ (1).json") wider_df
data2 <- get_shots2(json_file = "data/la_liga_events/ (2).json") }, df$shot$freeze_frame)
expect_equivalent(colnames(data1), colnames(data2)) mapply( function(sublist) {
expect_false(is.vector(data1)) df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
expect_true(is.data.frame(data1)) # df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
colnames(data1) print(df_players)
colnames(data2) print(sublist$teammate)
expect_length(length(colnames(data1)), length(colnames(data2))) df <- cbind(df_players, sublist$teammate)
length(colnames(data1)) colnames(df) <- c("x", "y", "teammate")
length(colnames(data2)) na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
?expect_length colnames(na_df) <- colnames(df)
expect_length(colnames(data1), colnames(data2)) dff <- rbind(df, na_df)
expect_length(colnames(data1), colnames(data2)) wider_df <- dff %>%
expect_length(colnames(data1), 22) mutate(row = row_number()) %>%
expect_length(colnames(data2), 22) pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
expect_false(is.vector(data1)) wider_df
expect_true(is.data.frame(data1)) }, df$shot$freeze_frame)
df <- read.csv("data/data4.csv", nrows = 1000) df$shot$freeze_frame[[1]]$teammate
View(df) mapply( function(sublist) {
df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
print(df_players)
print(sublist$teammate)
df <- cbind(df_players, sublist$teammate)
colnames(df) <- c("x", "y", "teammate")
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
colnames(na_df) <- colnames(df)
dff <- rbind(df, na_df)
wider_df <- dff %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
wider_df
}, df$shot$freeze_frame)
mapply( function(sublist) {
df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
df <- cbind(df_players, sublist$teammate)
colnames(df) <- c("x", "y", "teammate")
print(df)
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
colnames(na_df) <- colnames(df)
dff <- rbind(df, na_df)
wider_df <- dff %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
wider_df
}, df$shot$freeze_frame)
df$shot$freeze_frame[[3]]$teammate
df$shot$freeze_frame[[10]]$teammate
df$shot$freeze_frame[[115]]$teammate
df$shot$freeze_frame[[11]]$teammate
length(df$shot$freeze_frame)
df$shot$freeze_frame[[25]]$teammate
df$shot$freeze_frame[[24]]$teammate
# TODO not all json data have these same keys/fields, customize function to cover all cases
get_shots2 <- function(json_file) {
data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot))
df_temp <- do.call(rbind, lapply(data$location, function(loc) c(120, 80) - loc))
colnames(df_temp) <- c("x1", "y1")
data$x1 <- df_temp[,1]
data$y1 <- df_temp[,2]
tryCatch({
df_players_location <- mapply( function(sublist) {
if (!is.null(sublist$teammate)) {
df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
df <- cbind(df_players, sublist$teammate)
colnames(df) <- c("x", "y", "teammate")
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
colnames(na_df) <- colnames(df)
dff <- rbind(df, na_df)
} else {
dff <- as.data.frame(matrix(NA, nrow = 21, ncol = 3))
colnames(dff) <- c("x", "y", "teammate")
}
wider_df <- dff %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
wider_df
}, data$shot$freeze_frame)
},
error = function(e) {
# handle the error
print(json_file)
print(paste("An error occurred:", e$message))
})
df_players_location <- df_players_location %>% t()
# df <- do.call(rbind, df_test$shot$freeze_frame[[1]]$location) %>% as.data.frame()
# dff <- cbind(df, df_test$shot$freeze_frame[[1]]$teammate)
# colnames(dff) <- c("x", "y", "teammate")
#
#
# na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(dff)))
# colnames(na_df) <- colnames(dff)
# dff <- rbind(dff, na_df)
#
# wider_df <- dff %>%
# mutate(row = row_number()) %>%
# pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
tryCatch({ # TODO reduce error cases
data$number_of_players_opponents <- mapply(function(sublist, x1_threshold) {
# Extracting the first location value and converting it to numeric
first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1]))
if ("teammate" %in% names(sublist)) {
# Filtering and counting
res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here
} else {
res <- 0
}
res
}, data$shot$freeze_frame, data$x1)
},
error = function(e) {
print(json_file)
# handle the error
print(paste("An error occurred:", e$message))
})
tryCatch({ # TODO reduce error cases
data$number_of_players_teammates <- mapply(function(sublist, x1_threshold) {
# Extracting the first location value and converting it to numeric
first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1]))
if ("teammate" %in% names(sublist)) {
# Filtering and counting
res <- sum(sublist$teammate & first_location_values > x1_threshold) # error here
} else {
res <- 0
}
res
}, data$shot$freeze_frame, data$x1)
},
error = function(e) {
print(json_file)
# handle the error
print(paste("An error occurred:", e$message))
})
data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id)
data$shot$body_part <- data$shot$body_part %>% select(-id)
data$shot$technique <- data$shot$technique %>% select(-id)
data$shot$type <- data$shot$type %>% select(-id)
data$position <- data$position %>% select(-id)
data$shot <- data$shot %>% select(-end_location)
tryCatch({ # TODO reduce error cases
if ("one_on_one" %in% colnames(data$shot)) {
data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE
} else {
data$shot$one_on_one <- FALSE
}
if ("first_time" %in% colnames(data$shot)) {
data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE
} else {
data$shot$first_time <- FALSE
}
if ("aerial_won" %in% colnames(data$shot)) {
data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE
} else {
data$shot$aerial_won <- FALSE
}
if ("saved_to_post" %in% colnames(data$shot)) {
data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE
} else {
data$shot$saved_to_post <- FALSE
}
if ("deflected" %in% colnames(data$shot)) {
data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE
} else {
data$shot$deflected <- FALSE
}
if ("saved_off_target" %in% colnames(data$shot)) {
data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE
} else {
data$shot$saved_off_target <- FALSE
}
if ("open_goal" %in% colnames(data$shot)) {
data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE
} else {
data$shot$open_goal <- FALSE
}
if ("follows_dribble" %in% colnames(data$shot)) {
data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE
} else {
data$shot$follows_dribble <- FALSE
}
if ("redirect" %in% colnames(data$shot)) {
data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE
} else {
data$shot$redirect <- FALSE
}
if ("kick_off" %in% colnames(data$kick_off)) {
data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE
} else {
data$kick_off <- FALSE
}
},
error = function(e) {
# handle the error
print(paste("An error occurred:", e$message))
})
data <- data %>% mutate(is_goal = ifelse(shot$outcome$id == 97, 1, 0),
x1 = yd_to_m(x1) %>% round(., digits = 1),
y1 = yd_to_m(y1) %>% round(., digits = 1),
angle = loc2angle(x1, y1) %>% round(., digits = 1)) %>%
select(-location)
data$shot$outcome <- data$shot$outcome %>% select(-id)
data <- data %>% unnest(shot, names_sep = "_") %>%
unnest(position, names_sep = "_") %>%
unnest(shot_type, names_sep = "_") %>%
unnest(shot_outcome, names_sep = "_") %>%
unnest(shot_technique, names_sep = "_") %>%
unnest(shot_body_part, names_sep = "_")
data <- cbind(data, df_players_location)
data
}
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
View(data3_final)
combined_data <- do.call(rbind, data_list)
data_list
combined_data <- do.call(rbind, data_list)
combined_data <- do.call(bind_rows, data_list)
nrow(combined_data)
data3_final <- combined_data %>% select(-c(shot_outcome_name,
shot_saved_off_target,
shot_saved_to_post)) %>%
mutate(is_head = ifelse(shot_body_part_name == "Head", 1, 0),
is_right_foot = ifelse(shot_body_part_name == "Right Foot", 1, 0),
is_left_foot = ifelse(shot_body_part_name == "Left Foot", 1, 0),
is_open_play = ifelse(shot_type_name == "Open Play", 1, 0)) %>%
select(-shot_body_part_name)
colnames(data3_final)
write.csv(data3_final, file = "data/final_data.csv")
write_csv(data3_final, file = "data/final_data.csv")

8
.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
.Rhistory
data/players.json
data/ginf.csv
data/events.csv
data/events
data/dictionary.txt
data/la_liga_events
data/la_liga_events_csv

BIN
data/.DS_Store vendored

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -7,6 +7,7 @@ library(ggsoccer)
library(dplyr) library(dplyr)
library(REdaS) library(REdaS)
library(yd2m) library(yd2m)
library(purrr)
# code and data from https://github.com/Dato-Futbol/xg-model # code and data from https://github.com/Dato-Futbol/xg-model
get_shots <- function(file_path, name_detail, save_files = F){ get_shots <- function(file_path, name_detail, save_files = F){
@ -112,6 +113,10 @@ loc2angle <- function(x, y) {
deg deg
} }
loc2distance <- function(x, y) {
sqrt(x^2 + y^2)
}
get_shots2 <- function(json_file) { get_shots2 <- function(json_file) {
data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot))
@ -128,7 +133,11 @@ get_shots2 <- function(json_file) {
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt)) # df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
df <- cbind(df_players, sublist$teammate) df <- cbind(df_players, sublist$teammate)
colnames(df) <- c("x", "y", "teammate") colnames(df) <- c("x", "y", "teammate")
# df <- df %>% unnest(c(x, y, teammate))
# df$x <- as.numeric(df$x)
# df$y <- as.numeric(df$y)
# df$teammate <- as.logical(df$teammate) %>% as.numeric()
#
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df))) na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
colnames(na_df) <- colnames(df) colnames(na_df) <- colnames(df)
dff <- rbind(df, na_df) dff <- rbind(df, na_df)
@ -139,6 +148,7 @@ get_shots2 <- function(json_file) {
wider_df <- dff %>% wider_df <- dff %>%
mutate(row = row_number()) %>% mutate(row = row_number()) %>%
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player") pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
# wider_df <- apply(wider_df, MARGIN = 2, unlist)
wider_df wider_df
}, data$shot$freeze_frame) }, data$shot$freeze_frame)
}, },
@ -280,7 +290,8 @@ get_shots2 <- function(json_file) {
data <- data %>% mutate(is_goal = ifelse(shot$outcome$id == 97, 1, 0), data <- data %>% mutate(is_goal = ifelse(shot$outcome$id == 97, 1, 0),
x1 = yd_to_m(x1) %>% round(., digits = 1), x1 = yd_to_m(x1) %>% round(., digits = 1),
y1 = yd_to_m(y1) %>% round(., digits = 1), y1 = yd_to_m(y1) %>% round(., digits = 1),
angle = loc2angle(x1, y1) %>% round(., digits = 1)) %>% angle = loc2angle(x1, y1) %>% round(., digits = 1),
distance = loc2distance(x = x1, y = y1)) %>%
select(-location) select(-location)
data$shot$outcome <- data$shot$outcome %>% select(-id) data$shot$outcome <- data$shot$outcome %>% select(-id)
data <- data %>% unnest(shot, names_sep = "_") %>% data <- data %>% unnest(shot, names_sep = "_") %>%
@ -296,27 +307,22 @@ get_shots2 <- function(json_file) {
file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json") file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json")
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2) data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
combined_data <- do.call(bind_rows, data_list) combined_data <- bind_rows(data_list)
# ss <- lapply(data_list, nrow) # ss <- lapply(data_list, nrow)
# sss <- unlist(ss) # sss <- unlist(ss)
# #
# # sample data # # sample data
# data_test <- get_shots2("data/la_liga_events/303377.json") # data_test <- get_shots2("data/la_liga_events/ (1000).json")
data3_final <- combined_data %>% select(-c(shot_outcome_name, data3_final <- combined_data %>% select(-c(shot_outcome_name,
shot_saved_off_target, shot_saved_off_target,
shot_saved_to_post)) %>% shot_saved_to_post,
mutate(is_head = ifelse(shot_body_part_name == "Head", 1, 0), kick_off)) %>%
is_right_foot = ifelse(shot_body_part_name == "Right Foot", 1, 0), mutate(shot_kick_off = ifelse(is.na(shot_kick_off), FALSE, shot_kick_off))
is_left_foot = ifelse(shot_body_part_name == "Left Foot", 1, 0), pattern <- "^(x_player|y_player|teammate_player)[0-9]+$"
is_open_play = ifelse(shot_type_name == "Open Play", 1, 0)) %>% cols <- names(data3_final)[grepl(pattern, names(data3_final))]
select(-shot_body_part_name) data3_final <- data3_final %>% unnest(all_of(cols))
write_csv(data3_final, file = "data/final_data.csv") write_csv(data3_final, file = "data/final_data.csv")
# df_test <- read.csv("data/final_data.csv", nrows = 100)
##################### The fourth dataset ############################## ##################### The fourth dataset ##############################