fantastyczne_gole/.Rhistory

178 lines
5.7 KiB
R
Raw Normal View History

2023-12-11 23:45:53 +01:00
data4 <- read.csv("data/data4.csv")
setwd("~/Desktop/fantastyczne_gole")
data4 <- read.csv("data/data4.csv")
nrow
nrow(data4)
data_list
2023-11-28 22:07:24 +01:00
file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json")
2023-12-11 23:45:53 +01:00
length(file_names)
2023-11-28 22:07:24 +01:00
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
2023-12-11 23:45:53 +01:00
library(jsonlite)
library(tidyverse)
library(ggsoccer)
library(dplyr)
2023-11-28 22:07:24 +01:00
# TODO not all json data have these same keys/fields, customize function to cover all cases
get_shots2 <- function(json_file) {
data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot))
df_temp <- do.call(rbind, data$location)
colnames(df_temp) <- c("x1", "y1")
data$x1 <- df_temp[,1]
data$y1 <- df_temp[,2]
tryCatch({ # TODO reduce error cases
data$number_of_players_opponents <- mapply(function(sublist, x1_threshold) {
# Extracting the first location value and converting it to numeric
first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1]))
if ("teammate" %in% names(sublist)) {
# Filtering and counting
res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here
} else {
res <- 0
}
res
}, data$shot$freeze_frame, data$x1)
},
error = function(e) {
# handle the error
print(paste("An error occurred:", e$message))
})
tryCatch({ # TODO reduce error cases
data$number_of_players_teammates <- mapply(function(sublist, x1_threshold) {
# Extracting the first location value and converting it to numeric
first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1]))
if ("teammate" %in% names(sublist)) {
# Filtering and counting
res <- sum(sublist$teammate & first_location_values > x1_threshold) # error here
} else {
res <- 0
}
res
}, data$shot$freeze_frame, data$x1)
},
error = function(e) {
# handle the error
print(paste("An error occurred:", e$message))
})
data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id)
data$shot$body_part <- data$shot$body_part %>% select(-id)
data$shot$technique <- data$shot$technique %>% select(-id)
data$shot$type <- data$shot$type %>% select(-id)
data$position <- data$position %>% select(-id)
data$shot <- data$shot %>% select(-end_location)
tryCatch({ # TODO reduce error cases
if ("one_on_one" %in% colnames(data$shot)) {
data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE
} else {
data$shot$one_on_one <- FALSE
}
if ("first_time" %in% colnames(data$shot)) {
data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE
} else {
data$shot$first_time <- FALSE
}
if ("aerial_won" %in% colnames(data$shot)) {
data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE
} else {
data$shot$aerial_won <- FALSE
}
if ("saved_to_post" %in% colnames(data$shot)) {
data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE
} else {
data$shot$saved_to_post <- FALSE
}
if ("deflected" %in% colnames(data$shot)) {
data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE
} else {
data$shot$deflected <- FALSE
}
if ("saved_off_target" %in% colnames(data$shot)) {
data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE
} else {
data$shot$saved_off_target <- FALSE
}
if ("open_goal" %in% colnames(data$shot)) {
data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE
} else {
data$shot$open_goal <- FALSE
}
if ("follows_dribble" %in% colnames(data$shot)) {
data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE
} else {
data$shot$follows_dribble <- FALSE
}
if ("redirect" %in% colnames(data$shot)) {
data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE
} else {
data$shot$redirect <- FALSE
}
if ("kick_off" %in% colnames(data$kick_off)) {
data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE
} else {
data$kick_off <- FALSE
}
},
error = function(e) {
# handle the error
print(paste("An error occurred:", e$message))
})
data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)),
is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>%
select(-location)
data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle)
data$shot$outcome <- data$shot$outcome %>% select(-id)
data <- data %>% unnest(shot, names_sep = "_") %>%
unnest(position, names_sep = "_") %>%
unnest(shot_type, names_sep = "_") %>%
unnest(shot_outcome, names_sep = "_") %>%
unnest(shot_technique, names_sep = "_") %>%
unnest(shot_body_part, names_sep = "_")
data
}
2023-12-11 23:45:53 +01:00
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
length(data_list)
ss <- lapply(data_list, nrow())
ss <- lapply(data_list, nrow)
ss
sum(ss)
sss <- unlist(ss)
sss
sum(sss)
source("notebooks/dataCleaning.R")
source("notebooks/dataCleaning.R")
# generate random data
random_data <- as.data.frame(x = rnorm(100),
y = rnorm(100, mean = 1, sd = 3))
# unit tests
expect_silent(
data <- get_shots(file_path = "data/events/events_England.json", name_detail = "EN")
)
library(tinytest)
# unit tests
expect_silent(
data <- get_shots(file_path = "data/events/events_England.json", name_detail = "EN")
)
expect_silent(
data <- get_shots2(json_file = "data/la_liga_events/ (1).json")
)
expect_error(
get_final_data(data = random_data)
)
data1 <- get_shots2(json_file = "data/la_liga_events/ (1).json")
data2 <- get_shots2(json_file = "data/la_liga_events/ (2).json")
expect_equivalent(colnames(data1), colnames(data2))
expect_false(is.vector(data1))
expect_true(is.data.frame(data1))
2023-11-28 22:07:24 +01:00
colnames(data1)
2023-12-11 23:45:53 +01:00
colnames(data2)
expect_length(length(colnames(data1)), length(colnames(data2)))
length(colnames(data1))
length(colnames(data2))
?expect_length
expect_length(colnames(data1), colnames(data2))
expect_length(colnames(data1), colnames(data2))
expect_length(colnames(data1), 22)
expect_length(colnames(data2), 22)
expect_false(is.vector(data1))
expect_true(is.data.frame(data1))
df <- read.csv("data/data4.csv", nrows = 1000)
View(df)