From 2ca84b303cef5632cf15b9017dffedd25e5f9bf9 Mon Sep 17 00:00:00 2001 From: LukaszChrostowski Date: Tue, 26 Dec 2023 17:18:57 +0100 Subject: [PATCH] more data cleaning --- .DS_Store | Bin 8196 -> 8196 bytes .../modele/__pycache__/modele.cpython-39.pyc | Bin 0 -> 757 bytes data/final_data.csv | 76686 ++++++++-------- notebooks/.DS_Store | Bin 0 -> 6148 bytes notebooks/dataCleaning.R | 69 +- 5 files changed, 38404 insertions(+), 38351 deletions(-) create mode 100644 app/src/flask-server/modele/__pycache__/modele.cpython-39.pyc create mode 100644 notebooks/.DS_Store diff --git a/.DS_Store b/.DS_Store index ccab0158c5050ea9e45cf48cd31255b707d5703f..687a30d447d8b6416a278a0b6350c10fe5895439 100644 GIT binary patch delta 91 zcmZp1XmOa}&&kEWz`)4BAi%IOYAyR_Rt`1}b&wDbesWSyeiBeb0I2RK nkkVMs8zIJ5bf0wTb<&M)D35jsk9WxRfG0N;-yclZ zFEmsd%$yZMRkg9JaxK~3T+Kv1o1|{kDpT89$>L%oUb!b6dzmbuu&SsO6Q5knWF^3D zH_Vo&MN`YA1^05gY-XT~57L4@16j+e_}sQig0$wp)vv%B90A59wK-JEdjB36N16tu z63a#!O_7hg9z5Zbq4v%NEzi>fX4#=;4Y*LK5&@)(YsujvAGaEKUD~46YQ|iw)l%}w zmPR3ojy%L35)T6pLl0gA?N!%al00q{2a(6eg@p54qMzc&KgZcO1KMO_xnO2po@!;R zIyZ|tI{>q=s?AQAwCtWDWaCmTg?aJ+b>}9FTqh2TD zrTFJJ{L02JO6ZV2rmv`ej`1YUlY0?+YVxFIk~5+j*&7$lX!zbK_y{UrtLWV=uKIGgIXPEeEQuaumIad+mnVI{{IpJK9t+lW&tLL$IPL9buic?0I|$&Yi!F0gKz?mp~uW2@6e2+5*^j}BZhHw z_6L4k=rMEX=rI29Vf@U-pHPgQo%sVh942(AQ58@HRu$NBmv!0yJDbn{t0cWr1yq55 zrGP27ui7mx$?vUei<7-Jpx@BNWL)OZQW*SpY#Xu_AJDDwULXx(=rME19-4j#SQ#{^ I0zaz2CoBl70ssI2 literal 0 HcmV?d00001 diff --git a/notebooks/dataCleaning.R b/notebooks/dataCleaning.R index 8858b5e..da49ac2 100644 --- a/notebooks/dataCleaning.R +++ b/notebooks/dataCleaning.R @@ -117,6 +117,17 @@ loc2distance <- function(x, y) { sqrt(x^2 + y^2) } +loc2locdistance <- function(x1, y1, x2, y2) { + sqrt( (x1 - x2)^2 + (y1 - y2)^2 ) +} + +# TODO +# osobno bramkarz, wartość opponent_Goalkeeper +# kolejność w row_number() wg odlegości do piłki z podziałem na teammate/opponent +# przykład, najbliżej w teammate -> teammate_1, +# przykład najbliżej w opponent -> opponent_1 +# w przypadku NA, wypełnić wartościami teammete_(nt+1):11, opponent_(no+1):11 + get_shots2 <- function(json_file) { data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) @@ -126,28 +137,68 @@ get_shots2 <- function(json_file) { data$x1 <- df_temp[,1] data$y1 <- df_temp[,2] + data$shot$freeze_frame <- Map(function(ff, x1, y1) { + ff$x1 <- yd_to_m(x1) + ff$y1 <- yd_to_m(y1) + return(ff) + }, + data$shot$freeze_frame, data$x1, data$y1) + tryCatch({ df_players_location <- mapply( function(sublist) { if (!is.null(sublist$teammate)) { df_players <- sapply(sublist$location, function(loc) c(120, 80) - loc %>% as.numeric() %>% yd_to_m() %>% round(., digits = 1)) %>% t() %>% as.data.frame() # df <- sapply(sublist$teammate, function(tmt) cbind(df_players, tmt)) - df <- cbind(df_players, sublist$teammate) - colnames(df) <- c("x", "y", "teammate") + df <- cbind(df_players, sublist$teammate, sublist$position$name, sublist$x1, sublist$y1) + colnames(df) <- c("x", "y", "teammate", "position_name", "x1", "y1") + df <- df %>% mutate(teammate = ifelse(teammate, "teammate", "opponent"), + distance = loc2locdistance(x1 = x, y1 = y, x2 = x1, y2 = y1)) %>% + arrange(distance) + + groups_count <- df %>% group_by(teammate) %>% count() %>% as.data.frame() + if ( !("opponent" %in% groups_count$teammate) ) { + groups_count <- groups_count %>% add_row(teammate = "opponent", n = 0) + } else if ( !("teammate" %in% groups_count$teammate) ) { + groups_count <- groups_count %>% add_row(teammate = "teammate", n = 0) + } + # df$distance <- loc2locdistance(x1 = df$x, y1 = df$y, x2 = data$x1, y1 = data$y1) + # df <- df %>% mutate(distance = loc2locdistance(x1 = x, y1 = y, x2 = data$x1, y1 = data$y1)) %>% arrange(distance) + # df <- df %>% arrange(distance) # df <- df %>% unnest(c(x, y, teammate)) # df$x <- as.numeric(df$x) # df$y <- as.numeric(df$y) # df$teammate <- as.logical(df$teammate) %>% as.numeric() - # - na_df <- as.data.frame(matrix(NA, nrow = 21 - nrow(df), ncol = ncol(df))) + + na_df <- as.data.frame(matrix("na", nrow = 21 - nrow(df), ncol = ncol(df))) colnames(na_df) <- colnames(df) + + na_df$teammate <- rep(c("opponent", "teammate"), c(11, 10) - groups_count$n) dff <- rbind(df, na_df) + dff <- dff %>% group_by(teammate) %>% mutate(rown = row_number(distance)) %>% ungroup() %>% + mutate(position_teammate = paste(teammate, ifelse(position_name == "Goalkeeper", position_name, rown), sep = "_")) %>% + select(-c(teammate, position_name, rown, distance, x1, y1)) %>% + mutate(x = ifelse(x == "na", NA, x), + y = ifelse(x == "na", NA, y)) + + # dff <- dff %>% mutate(row = row_number()) %>% + # mutate(position_teammate = paste(ifelse(position_name == "Goalkeeper", position_name, row), teammate, sep = "_")) %>% + # select(-c(teammate, position_name, row)) } else { - dff <- as.data.frame(matrix(NA, nrow = 21, ncol = 3)) + dff <- as.data.frame(matrix("na", nrow = 21, ncol = 3)) colnames(dff) <- c("x", "y", "teammate") + dff$teammate <- rep(c("opponent", "teammate"), c(11, 10)) + dff <- dff %>% group_by(teammate) %>% mutate(rown = row_number()) %>% ungroup() %>% + mutate(position_teammate = paste(teammate, rown)) %>% + select(c(-teammate, rown)) %>% + mutate(x = ifelse(x == "na", NA, x), + y = ifelse(x == "na", NA, y)) } + # print(dff) + # print(wider_df) + # stop("123") + # %>% wider_df <- dff %>% - mutate(row = row_number()) %>% - pivot_wider(names_from = row, values_from = c(x, y, teammate), names_sep = "_player") + pivot_wider(names_from = position_teammate, values_from = c(x, y), names_sep = "_player_") # wider_df <- apply(wider_df, MARGIN = 2, unlist) wider_df }, data$shot$freeze_frame) @@ -157,7 +208,9 @@ get_shots2 <- function(json_file) { print(json_file) print(paste("An error occurred:", e$message)) }) + print(df_players_location) df_players_location <- df_players_location %>% t() + print(df_players_location) # df <- do.call(rbind, df_test$shot$freeze_frame[[1]]$location) %>% as.data.frame() # dff <- cbind(df, df_test$shot$freeze_frame[[1]]$teammate) @@ -313,7 +366,7 @@ combined_data <- bind_rows(data_list) # sss <- unlist(ss) # # # sample data -# data_test <- get_shots2("data/la_liga_events/ (1000).json") +data <- fromJSON("data/la_liga_events/ (1000).json") %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) data3_final <- combined_data %>% select(-c(shot_outcome_name, shot_saved_off_target,