small fixes in final data loading
This commit is contained in:
parent
c099e92132
commit
913bfc5275
461
.Rhistory
461
.Rhistory
@ -1,23 +1,145 @@
|
|||||||
data4 <- read.csv("data/data4.csv")
|
res
|
||||||
setwd("~/Desktop/fantastyczne_gole")
|
}, data$shot$freeze_frame, data$x1)
|
||||||
data4 <- read.csv("data/data4.csv")
|
},
|
||||||
nrow
|
error = function(e) {
|
||||||
nrow(data4)
|
# handle the error
|
||||||
data_list
|
print(paste("An error occurred:", e$message))
|
||||||
file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json")
|
})
|
||||||
length(file_names)
|
tryCatch({ # TODO reduce error cases
|
||||||
|
data$number_of_players_teammates <- mapply(function(sublist, x1_threshold) {
|
||||||
|
# Extracting the first location value and converting it to numeric
|
||||||
|
first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1]))
|
||||||
|
if ("teammate" %in% names(sublist)) {
|
||||||
|
# Filtering and counting
|
||||||
|
res <- sum(sublist$teammate & first_location_values > x1_threshold) # error here
|
||||||
|
} else {
|
||||||
|
res <- 0
|
||||||
|
}
|
||||||
|
res
|
||||||
|
}, data$shot$freeze_frame, data$x1)
|
||||||
|
},
|
||||||
|
error = function(e) {
|
||||||
|
# handle the error
|
||||||
|
print(paste("An error occurred:", e$message))
|
||||||
|
})
|
||||||
|
data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id)
|
||||||
|
data$shot$body_part <- data$shot$body_part %>% select(-id)
|
||||||
|
data$shot$technique <- data$shot$technique %>% select(-id)
|
||||||
|
data$shot$type <- data$shot$type %>% select(-id)
|
||||||
|
data$position <- data$position %>% select(-id)
|
||||||
|
data$shot <- data$shot %>% select(-end_location)
|
||||||
|
tryCatch({ # TODO reduce error cases
|
||||||
|
if ("one_on_one" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$one_on_one <- FALSE
|
||||||
|
}
|
||||||
|
if ("first_time" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$first_time <- FALSE
|
||||||
|
}
|
||||||
|
if ("aerial_won" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$aerial_won <- FALSE
|
||||||
|
}
|
||||||
|
if ("saved_to_post" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$saved_to_post <- FALSE
|
||||||
|
}
|
||||||
|
if ("deflected" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$deflected <- FALSE
|
||||||
|
}
|
||||||
|
if ("saved_off_target" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$saved_off_target <- FALSE
|
||||||
|
}
|
||||||
|
if ("open_goal" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$open_goal <- FALSE
|
||||||
|
}
|
||||||
|
if ("follows_dribble" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$follows_dribble <- FALSE
|
||||||
|
}
|
||||||
|
if ("redirect" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$redirect <- FALSE
|
||||||
|
}
|
||||||
|
if ("kick_off" %in% colnames(data$kick_off)) {
|
||||||
|
data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE
|
||||||
|
} else {
|
||||||
|
data$kick_off <- FALSE
|
||||||
|
}
|
||||||
|
},
|
||||||
|
error = function(e) {
|
||||||
|
# handle the error
|
||||||
|
print(paste("An error occurred:", e$message))
|
||||||
|
})
|
||||||
|
data <- data %>% mutate(is_goal = ifelse(shot$outcome$id == 97, 1, 0),
|
||||||
|
x1 = yd_to_m(x1) %>% round(., digits = 1),
|
||||||
|
y1 = yd_to_m(y1) %>% round(., digits = 1),
|
||||||
|
angle = loc2angle(x1, y1) %>% round(., digits = 1)) %>%
|
||||||
|
select(-location)
|
||||||
|
data$shot$outcome <- data$shot$outcome %>% select(-id)
|
||||||
|
data <- data %>% unnest(shot, names_sep = "_") %>%
|
||||||
|
unnest(position, names_sep = "_") %>%
|
||||||
|
unnest(shot_type, names_sep = "_") %>%
|
||||||
|
unnest(shot_outcome, names_sep = "_") %>%
|
||||||
|
unnest(shot_technique, names_sep = "_") %>%
|
||||||
|
unnest(shot_body_part, names_sep = "_")
|
||||||
|
data <- cbind(data, df_players_location)
|
||||||
|
data
|
||||||
|
}
|
||||||
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
|
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
|
||||||
library(jsonlite)
|
|
||||||
library(tidyverse)
|
|
||||||
library(ggsoccer)
|
|
||||||
library(dplyr)
|
|
||||||
# TODO not all json data have these same keys/fields, customize function to cover all cases
|
# TODO not all json data have these same keys/fields, customize function to cover all cases
|
||||||
get_shots2 <- function(json_file) {
|
get_shots2 <- function(json_file) {
|
||||||
data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot))
|
data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot))
|
||||||
df_temp <- do.call(rbind, data$location)
|
df_temp <- do.call(rbind, lapply(data$location, function(loc) c(120, 80) - loc))
|
||||||
colnames(df_temp) <- c("x1", "y1")
|
colnames(df_temp) <- c("x1", "y1")
|
||||||
data$x1 <- df_temp[,1]
|
data$x1 <- df_temp[,1]
|
||||||
data$y1 <- df_temp[,2]
|
data$y1 <- df_temp[,2]
|
||||||
|
tryCatch({
|
||||||
|
df_players_location <- mapply( function(sublist) {
|
||||||
|
df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
|
||||||
|
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
|
||||||
|
df <- cbind(df_players, sublist$teammate)
|
||||||
|
colnames(df) <- c("x", "y", "teammate")
|
||||||
|
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
|
||||||
|
colnames(na_df) <- colnames(df)
|
||||||
|
dff <- rbind(df, na_df)
|
||||||
|
wider_df <- dff %>%
|
||||||
|
mutate(row = row_number()) %>%
|
||||||
|
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
|
||||||
|
wider_df
|
||||||
|
}, data$shot$freeze_frame)
|
||||||
|
},
|
||||||
|
error = function(e) {
|
||||||
|
# handle the error
|
||||||
|
print(json_file)
|
||||||
|
print(paste("An error occurred:", e$message))
|
||||||
|
})
|
||||||
|
df_players_location <- df_players_location %>% t()
|
||||||
|
# df <- do.call(rbind, df_test$shot$freeze_frame[[1]]$location) %>% as.data.frame()
|
||||||
|
# dff <- cbind(df, df_test$shot$freeze_frame[[1]]$teammate)
|
||||||
|
# colnames(dff) <- c("x", "y", "teammate")
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(dff)))
|
||||||
|
# colnames(na_df) <- colnames(dff)
|
||||||
|
# dff <- rbind(dff, na_df)
|
||||||
|
#
|
||||||
|
# wider_df <- dff %>%
|
||||||
|
# mutate(row = row_number()) %>%
|
||||||
|
# pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
|
||||||
tryCatch({ # TODO reduce error cases
|
tryCatch({ # TODO reduce error cases
|
||||||
data$number_of_players_opponents <- mapply(function(sublist, x1_threshold) {
|
data$number_of_players_opponents <- mapply(function(sublist, x1_threshold) {
|
||||||
# Extracting the first location value and converting it to numeric
|
# Extracting the first location value and converting it to numeric
|
||||||
@ -114,10 +236,11 @@ error = function(e) {
|
|||||||
# handle the error
|
# handle the error
|
||||||
print(paste("An error occurred:", e$message))
|
print(paste("An error occurred:", e$message))
|
||||||
})
|
})
|
||||||
data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)),
|
data <- data %>% mutate(is_goal = ifelse(shot$outcome$id == 97, 1, 0),
|
||||||
is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>%
|
x1 = yd_to_m(x1) %>% round(., digits = 1),
|
||||||
|
y1 = yd_to_m(y1) %>% round(., digits = 1),
|
||||||
|
angle = loc2angle(x1, y1) %>% round(., digits = 1)) %>%
|
||||||
select(-location)
|
select(-location)
|
||||||
data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle)
|
|
||||||
data$shot$outcome <- data$shot$outcome %>% select(-id)
|
data$shot$outcome <- data$shot$outcome %>% select(-id)
|
||||||
data <- data %>% unnest(shot, names_sep = "_") %>%
|
data <- data %>% unnest(shot, names_sep = "_") %>%
|
||||||
unnest(position, names_sep = "_") %>%
|
unnest(position, names_sep = "_") %>%
|
||||||
@ -125,53 +248,265 @@ unnest(shot_type, names_sep = "_") %>%
|
|||||||
unnest(shot_outcome, names_sep = "_") %>%
|
unnest(shot_outcome, names_sep = "_") %>%
|
||||||
unnest(shot_technique, names_sep = "_") %>%
|
unnest(shot_technique, names_sep = "_") %>%
|
||||||
unnest(shot_body_part, names_sep = "_")
|
unnest(shot_body_part, names_sep = "_")
|
||||||
|
data <- cbind(data, df_players_location)
|
||||||
data
|
data
|
||||||
}
|
}
|
||||||
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
|
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
|
||||||
length(data_list)
|
df <- fromJSON("data/la_liga_events/ (1006).json") %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot))
|
||||||
ss <- lapply(data_list, nrow())
|
df$shot$freeze_frame[[1]]$location
|
||||||
ss <- lapply(data_list, nrow)
|
df$shot$freeze_frame[[1]]$teammate
|
||||||
ss
|
mapply( function(sublist) {
|
||||||
sum(ss)
|
df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
|
||||||
sss <- unlist(ss)
|
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
|
||||||
sss
|
df <- cbind(df_players, sublist$teammate)
|
||||||
sum(sss)
|
colnames(df) <- c("x", "y", "teammate")
|
||||||
source("notebooks/dataCleaning.R")
|
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
|
||||||
source("notebooks/dataCleaning.R")
|
colnames(na_df) <- colnames(df)
|
||||||
# generate random data
|
dff <- rbind(df, na_df)
|
||||||
random_data <- as.data.frame(x = rnorm(100),
|
wider_df <- dff %>%
|
||||||
y = rnorm(100, mean = 1, sd = 3))
|
mutate(row = row_number()) %>%
|
||||||
# unit tests
|
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
|
||||||
expect_silent(
|
wider_df
|
||||||
data <- get_shots(file_path = "data/events/events_England.json", name_detail = "EN")
|
}, df$shot$freeze_frame)
|
||||||
)
|
mapply( function(sublist) {
|
||||||
library(tinytest)
|
df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
|
||||||
# unit tests
|
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
|
||||||
expect_silent(
|
print(df_players)
|
||||||
data <- get_shots(file_path = "data/events/events_England.json", name_detail = "EN")
|
df <- cbind(df_players, sublist$teammate)
|
||||||
)
|
colnames(df) <- c("x", "y", "teammate")
|
||||||
expect_silent(
|
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
|
||||||
data <- get_shots2(json_file = "data/la_liga_events/ (1).json")
|
colnames(na_df) <- colnames(df)
|
||||||
)
|
dff <- rbind(df, na_df)
|
||||||
expect_error(
|
wider_df <- dff %>%
|
||||||
get_final_data(data = random_data)
|
mutate(row = row_number()) %>%
|
||||||
)
|
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
|
||||||
data1 <- get_shots2(json_file = "data/la_liga_events/ (1).json")
|
wider_df
|
||||||
data2 <- get_shots2(json_file = "data/la_liga_events/ (2).json")
|
}, df$shot$freeze_frame)
|
||||||
expect_equivalent(colnames(data1), colnames(data2))
|
mapply( function(sublist) {
|
||||||
expect_false(is.vector(data1))
|
df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
|
||||||
expect_true(is.data.frame(data1))
|
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
|
||||||
colnames(data1)
|
print(df_players)
|
||||||
colnames(data2)
|
print(sublist$teammate)
|
||||||
expect_length(length(colnames(data1)), length(colnames(data2)))
|
df <- cbind(df_players, sublist$teammate)
|
||||||
length(colnames(data1))
|
colnames(df) <- c("x", "y", "teammate")
|
||||||
length(colnames(data2))
|
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
|
||||||
?expect_length
|
colnames(na_df) <- colnames(df)
|
||||||
expect_length(colnames(data1), colnames(data2))
|
dff <- rbind(df, na_df)
|
||||||
expect_length(colnames(data1), colnames(data2))
|
wider_df <- dff %>%
|
||||||
expect_length(colnames(data1), 22)
|
mutate(row = row_number()) %>%
|
||||||
expect_length(colnames(data2), 22)
|
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
|
||||||
expect_false(is.vector(data1))
|
wider_df
|
||||||
expect_true(is.data.frame(data1))
|
}, df$shot$freeze_frame)
|
||||||
df <- read.csv("data/data4.csv", nrows = 1000)
|
df$shot$freeze_frame[[1]]$teammate
|
||||||
View(df)
|
mapply( function(sublist) {
|
||||||
|
df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
|
||||||
|
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
|
||||||
|
print(df_players)
|
||||||
|
print(sublist$teammate)
|
||||||
|
df <- cbind(df_players, sublist$teammate)
|
||||||
|
colnames(df) <- c("x", "y", "teammate")
|
||||||
|
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
|
||||||
|
colnames(na_df) <- colnames(df)
|
||||||
|
dff <- rbind(df, na_df)
|
||||||
|
wider_df <- dff %>%
|
||||||
|
mutate(row = row_number()) %>%
|
||||||
|
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
|
||||||
|
wider_df
|
||||||
|
}, df$shot$freeze_frame)
|
||||||
|
mapply( function(sublist) {
|
||||||
|
df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
|
||||||
|
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
|
||||||
|
df <- cbind(df_players, sublist$teammate)
|
||||||
|
colnames(df) <- c("x", "y", "teammate")
|
||||||
|
print(df)
|
||||||
|
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
|
||||||
|
colnames(na_df) <- colnames(df)
|
||||||
|
dff <- rbind(df, na_df)
|
||||||
|
wider_df <- dff %>%
|
||||||
|
mutate(row = row_number()) %>%
|
||||||
|
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
|
||||||
|
wider_df
|
||||||
|
}, df$shot$freeze_frame)
|
||||||
|
df$shot$freeze_frame[[3]]$teammate
|
||||||
|
df$shot$freeze_frame[[10]]$teammate
|
||||||
|
df$shot$freeze_frame[[115]]$teammate
|
||||||
|
df$shot$freeze_frame[[11]]$teammate
|
||||||
|
length(df$shot$freeze_frame)
|
||||||
|
df$shot$freeze_frame[[25]]$teammate
|
||||||
|
df$shot$freeze_frame[[24]]$teammate
|
||||||
|
# TODO not all json data have these same keys/fields, customize function to cover all cases
|
||||||
|
get_shots2 <- function(json_file) {
|
||||||
|
data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot))
|
||||||
|
df_temp <- do.call(rbind, lapply(data$location, function(loc) c(120, 80) - loc))
|
||||||
|
colnames(df_temp) <- c("x1", "y1")
|
||||||
|
data$x1 <- df_temp[,1]
|
||||||
|
data$y1 <- df_temp[,2]
|
||||||
|
tryCatch({
|
||||||
|
df_players_location <- mapply( function(sublist) {
|
||||||
|
if (!is.null(sublist$teammate)) {
|
||||||
|
df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame()
|
||||||
|
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
|
||||||
|
df <- cbind(df_players, sublist$teammate)
|
||||||
|
colnames(df) <- c("x", "y", "teammate")
|
||||||
|
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
|
||||||
|
colnames(na_df) <- colnames(df)
|
||||||
|
dff <- rbind(df, na_df)
|
||||||
|
} else {
|
||||||
|
dff <- as.data.frame(matrix(NA, nrow = 21, ncol = 3))
|
||||||
|
colnames(dff) <- c("x", "y", "teammate")
|
||||||
|
}
|
||||||
|
wider_df <- dff %>%
|
||||||
|
mutate(row = row_number()) %>%
|
||||||
|
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
|
||||||
|
wider_df
|
||||||
|
}, data$shot$freeze_frame)
|
||||||
|
},
|
||||||
|
error = function(e) {
|
||||||
|
# handle the error
|
||||||
|
print(json_file)
|
||||||
|
print(paste("An error occurred:", e$message))
|
||||||
|
})
|
||||||
|
df_players_location <- df_players_location %>% t()
|
||||||
|
# df <- do.call(rbind, df_test$shot$freeze_frame[[1]]$location) %>% as.data.frame()
|
||||||
|
# dff <- cbind(df, df_test$shot$freeze_frame[[1]]$teammate)
|
||||||
|
# colnames(dff) <- c("x", "y", "teammate")
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(dff)))
|
||||||
|
# colnames(na_df) <- colnames(dff)
|
||||||
|
# dff <- rbind(dff, na_df)
|
||||||
|
#
|
||||||
|
# wider_df <- dff %>%
|
||||||
|
# mutate(row = row_number()) %>%
|
||||||
|
# pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
|
||||||
|
tryCatch({ # TODO reduce error cases
|
||||||
|
data$number_of_players_opponents <- mapply(function(sublist, x1_threshold) {
|
||||||
|
# Extracting the first location value and converting it to numeric
|
||||||
|
first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1]))
|
||||||
|
if ("teammate" %in% names(sublist)) {
|
||||||
|
# Filtering and counting
|
||||||
|
res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here
|
||||||
|
} else {
|
||||||
|
res <- 0
|
||||||
|
}
|
||||||
|
res
|
||||||
|
}, data$shot$freeze_frame, data$x1)
|
||||||
|
},
|
||||||
|
error = function(e) {
|
||||||
|
print(json_file)
|
||||||
|
# handle the error
|
||||||
|
print(paste("An error occurred:", e$message))
|
||||||
|
})
|
||||||
|
tryCatch({ # TODO reduce error cases
|
||||||
|
data$number_of_players_teammates <- mapply(function(sublist, x1_threshold) {
|
||||||
|
# Extracting the first location value and converting it to numeric
|
||||||
|
first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1]))
|
||||||
|
if ("teammate" %in% names(sublist)) {
|
||||||
|
# Filtering and counting
|
||||||
|
res <- sum(sublist$teammate & first_location_values > x1_threshold) # error here
|
||||||
|
} else {
|
||||||
|
res <- 0
|
||||||
|
}
|
||||||
|
res
|
||||||
|
}, data$shot$freeze_frame, data$x1)
|
||||||
|
},
|
||||||
|
error = function(e) {
|
||||||
|
print(json_file)
|
||||||
|
# handle the error
|
||||||
|
print(paste("An error occurred:", e$message))
|
||||||
|
})
|
||||||
|
data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id)
|
||||||
|
data$shot$body_part <- data$shot$body_part %>% select(-id)
|
||||||
|
data$shot$technique <- data$shot$technique %>% select(-id)
|
||||||
|
data$shot$type <- data$shot$type %>% select(-id)
|
||||||
|
data$position <- data$position %>% select(-id)
|
||||||
|
data$shot <- data$shot %>% select(-end_location)
|
||||||
|
tryCatch({ # TODO reduce error cases
|
||||||
|
if ("one_on_one" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$one_on_one <- FALSE
|
||||||
|
}
|
||||||
|
if ("first_time" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$first_time <- FALSE
|
||||||
|
}
|
||||||
|
if ("aerial_won" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$aerial_won <- FALSE
|
||||||
|
}
|
||||||
|
if ("saved_to_post" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$saved_to_post <- FALSE
|
||||||
|
}
|
||||||
|
if ("deflected" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$deflected <- FALSE
|
||||||
|
}
|
||||||
|
if ("saved_off_target" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$saved_off_target <- FALSE
|
||||||
|
}
|
||||||
|
if ("open_goal" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$open_goal <- FALSE
|
||||||
|
}
|
||||||
|
if ("follows_dribble" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$follows_dribble <- FALSE
|
||||||
|
}
|
||||||
|
if ("redirect" %in% colnames(data$shot)) {
|
||||||
|
data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE
|
||||||
|
} else {
|
||||||
|
data$shot$redirect <- FALSE
|
||||||
|
}
|
||||||
|
if ("kick_off" %in% colnames(data$kick_off)) {
|
||||||
|
data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE
|
||||||
|
} else {
|
||||||
|
data$kick_off <- FALSE
|
||||||
|
}
|
||||||
|
},
|
||||||
|
error = function(e) {
|
||||||
|
# handle the error
|
||||||
|
print(paste("An error occurred:", e$message))
|
||||||
|
})
|
||||||
|
data <- data %>% mutate(is_goal = ifelse(shot$outcome$id == 97, 1, 0),
|
||||||
|
x1 = yd_to_m(x1) %>% round(., digits = 1),
|
||||||
|
y1 = yd_to_m(y1) %>% round(., digits = 1),
|
||||||
|
angle = loc2angle(x1, y1) %>% round(., digits = 1)) %>%
|
||||||
|
select(-location)
|
||||||
|
data$shot$outcome <- data$shot$outcome %>% select(-id)
|
||||||
|
data <- data %>% unnest(shot, names_sep = "_") %>%
|
||||||
|
unnest(position, names_sep = "_") %>%
|
||||||
|
unnest(shot_type, names_sep = "_") %>%
|
||||||
|
unnest(shot_outcome, names_sep = "_") %>%
|
||||||
|
unnest(shot_technique, names_sep = "_") %>%
|
||||||
|
unnest(shot_body_part, names_sep = "_")
|
||||||
|
data <- cbind(data, df_players_location)
|
||||||
|
data
|
||||||
|
}
|
||||||
|
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
|
||||||
|
View(data3_final)
|
||||||
|
combined_data <- do.call(rbind, data_list)
|
||||||
|
data_list
|
||||||
|
combined_data <- do.call(rbind, data_list)
|
||||||
|
combined_data <- do.call(bind_rows, data_list)
|
||||||
|
nrow(combined_data)
|
||||||
|
data3_final <- combined_data %>% select(-c(shot_outcome_name,
|
||||||
|
shot_saved_off_target,
|
||||||
|
shot_saved_to_post)) %>%
|
||||||
|
mutate(is_head = ifelse(shot_body_part_name == "Head", 1, 0),
|
||||||
|
is_right_foot = ifelse(shot_body_part_name == "Right Foot", 1, 0),
|
||||||
|
is_left_foot = ifelse(shot_body_part_name == "Left Foot", 1, 0),
|
||||||
|
is_open_play = ifelse(shot_type_name == "Open Play", 1, 0)) %>%
|
||||||
|
select(-shot_body_part_name)
|
||||||
|
colnames(data3_final)
|
||||||
|
write.csv(data3_final, file = "data/final_data.csv")
|
||||||
|
write_csv(data3_final, file = "data/final_data.csv")
|
||||||
|
8
.gitignore
vendored
Normal file
8
.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
.Rhistory
|
||||||
|
data/players.json
|
||||||
|
data/ginf.csv
|
||||||
|
data/events.csv
|
||||||
|
data/events
|
||||||
|
data/dictionary.txt
|
||||||
|
data/la_liga_events
|
||||||
|
data/la_liga_events_csv
|
BIN
data/.DS_Store
vendored
BIN
data/.DS_Store
vendored
Binary file not shown.
76686
data/final_data.csv
76686
data/final_data.csv
File diff suppressed because it is too large
Load Diff
@ -7,6 +7,7 @@ library(ggsoccer)
|
|||||||
library(dplyr)
|
library(dplyr)
|
||||||
library(REdaS)
|
library(REdaS)
|
||||||
library(yd2m)
|
library(yd2m)
|
||||||
|
library(purrr)
|
||||||
|
|
||||||
# code and data from https://github.com/Dato-Futbol/xg-model
|
# code and data from https://github.com/Dato-Futbol/xg-model
|
||||||
get_shots <- function(file_path, name_detail, save_files = F){
|
get_shots <- function(file_path, name_detail, save_files = F){
|
||||||
@ -112,6 +113,10 @@ loc2angle <- function(x, y) {
|
|||||||
deg
|
deg
|
||||||
}
|
}
|
||||||
|
|
||||||
|
loc2distance <- function(x, y) {
|
||||||
|
sqrt(x^2 + y^2)
|
||||||
|
}
|
||||||
|
|
||||||
get_shots2 <- function(json_file) {
|
get_shots2 <- function(json_file) {
|
||||||
data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot))
|
data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot))
|
||||||
|
|
||||||
@ -128,7 +133,11 @@ get_shots2 <- function(json_file) {
|
|||||||
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
|
# df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt))
|
||||||
df <- cbind(df_players, sublist$teammate)
|
df <- cbind(df_players, sublist$teammate)
|
||||||
colnames(df) <- c("x", "y", "teammate")
|
colnames(df) <- c("x", "y", "teammate")
|
||||||
|
# df <- df %>% unnest(c(x, y, teammate))
|
||||||
|
# df$x <- as.numeric(df$x)
|
||||||
|
# df$y <- as.numeric(df$y)
|
||||||
|
# df$teammate <- as.logical(df$teammate) %>% as.numeric()
|
||||||
|
#
|
||||||
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
|
na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df)))
|
||||||
colnames(na_df) <- colnames(df)
|
colnames(na_df) <- colnames(df)
|
||||||
dff <- rbind(df, na_df)
|
dff <- rbind(df, na_df)
|
||||||
@ -139,6 +148,7 @@ get_shots2 <- function(json_file) {
|
|||||||
wider_df <- dff %>%
|
wider_df <- dff %>%
|
||||||
mutate(row = row_number()) %>%
|
mutate(row = row_number()) %>%
|
||||||
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
|
pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player")
|
||||||
|
# wider_df <- apply(wider_df, MARGIN = 2, unlist)
|
||||||
wider_df
|
wider_df
|
||||||
}, data$shot$freeze_frame)
|
}, data$shot$freeze_frame)
|
||||||
},
|
},
|
||||||
@ -280,7 +290,8 @@ get_shots2 <- function(json_file) {
|
|||||||
data <- data %>% mutate(is_goal = ifelse(shot$outcome$id == 97, 1, 0),
|
data <- data %>% mutate(is_goal = ifelse(shot$outcome$id == 97, 1, 0),
|
||||||
x1 = yd_to_m(x1) %>% round(., digits = 1),
|
x1 = yd_to_m(x1) %>% round(., digits = 1),
|
||||||
y1 = yd_to_m(y1) %>% round(., digits = 1),
|
y1 = yd_to_m(y1) %>% round(., digits = 1),
|
||||||
angle = loc2angle(x1, y1) %>% round(., digits = 1)) %>%
|
angle = loc2angle(x1, y1) %>% round(., digits = 1),
|
||||||
|
distance = loc2distance(x = x1, y = y1)) %>%
|
||||||
select(-location)
|
select(-location)
|
||||||
data$shot$outcome <- data$shot$outcome %>% select(-id)
|
data$shot$outcome <- data$shot$outcome %>% select(-id)
|
||||||
data <- data %>% unnest(shot, names_sep = "_") %>%
|
data <- data %>% unnest(shot, names_sep = "_") %>%
|
||||||
@ -296,27 +307,22 @@ get_shots2 <- function(json_file) {
|
|||||||
|
|
||||||
file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json")
|
file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json")
|
||||||
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
|
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
|
||||||
combined_data <- do.call(bind_rows, data_list)
|
combined_data <- bind_rows(data_list)
|
||||||
|
|
||||||
# ss <- lapply(data_list, nrow)
|
# ss <- lapply(data_list, nrow)
|
||||||
# sss <- unlist(ss)
|
# sss <- unlist(ss)
|
||||||
#
|
#
|
||||||
# # sample data
|
# # sample data
|
||||||
# data_test <- get_shots2("data/la_liga_events/303377.json")
|
# data_test <- get_shots2("data/la_liga_events/ (1000).json")
|
||||||
|
|
||||||
|
|
||||||
data3_final <- combined_data %>% select(-c(shot_outcome_name,
|
data3_final <- combined_data %>% select(-c(shot_outcome_name,
|
||||||
shot_saved_off_target,
|
shot_saved_off_target,
|
||||||
shot_saved_to_post)) %>%
|
shot_saved_to_post,
|
||||||
mutate(is_head = ifelse(shot_body_part_name == "Head", 1, 0),
|
kick_off)) %>%
|
||||||
is_right_foot = ifelse(shot_body_part_name == "Right Foot", 1, 0),
|
mutate(shot_kick_off = ifelse(is.na(shot_kick_off), FALSE, shot_kick_off))
|
||||||
is_left_foot = ifelse(shot_body_part_name == "Left Foot", 1, 0),
|
pattern <- "^(x_player|y_player|teammate_player)[0-9]+$"
|
||||||
is_open_play = ifelse(shot_type_name == "Open Play", 1, 0)) %>%
|
cols <- names(data3_final)[grepl(pattern, names(data3_final))]
|
||||||
select(-shot_body_part_name)
|
data3_final <- data3_final %>% unnest(all_of(cols))
|
||||||
|
|
||||||
write_csv(data3_final, file = "data/final_data.csv")
|
write_csv(data3_final, file = "data/final_data.csv")
|
||||||
|
# df_test <- read.csv("data/final_data.csv", nrows = 100)
|
||||||
|
|
||||||
##################### The fourth dataset ##############################
|
##################### The fourth dataset ##############################
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user