From b327147e8318e684c441695648946572c217faed Mon Sep 17 00:00:00 2001 From: LukaszChrostowski Date: Tue, 28 Nov 2023 22:07:24 +0100 Subject: [PATCH] add unit tests --- .DS_Store | Bin 6148 -> 8196 bytes .Rhistory | 512 +++++++++++++++++++++++++++++++++++++++ data/.DS_Store | Bin 8196 -> 8196 bytes notebooks/dataCleaning.R | 48 ++-- testy/tests.r | 32 +++ 5 files changed, 567 insertions(+), 25 deletions(-) create mode 100644 .Rhistory create mode 100644 testy/tests.r diff --git a/.DS_Store b/.DS_Store index 455d0f79c80e87a28ec34ac334d629f3adb91ff2..63e0d1dad46d8e8ddc36df5651a08d209c801295 100644 GIT binary patch delta 180 zcmZoMXmOBWU|?W$DortDU;r^WfEYvza8E20o2aMA$hk3KH}hr%jz7$c**Q2SHn1>q zZsuXJW}Ym{E;!kN<@jV_c43~f;G(>o{JeCKS(BaEO;uSLN*GcZih*RMXHI@{Qcivn z$WWkk9gx=h4+fhTiX<~nY~T@O28scJ1UHay1=+n>kmEb^WPTCP$^JYX9E=cWF>H?K HnZpbK8lWhK delta 107 zcmZp1XfcprU|?W$DortDU=RQ@Ie-{Mvv5r;6q~50$jG)aU^g=(+h!gCYv#$$A_f!7 tCM;&>;1Fa6DgpulZXn?bQophAJM(0I8BdUI1}2C}Aj26p$Mei#1^~C_5aj>> diff --git a/.Rhistory b/.Rhistory new file mode 100644 index 0000000..772cbdd --- /dev/null +++ b/.Rhistory @@ -0,0 +1,512 @@ +error = function(e) { +# handle the error +print(paste("An error occurred:", e$message)) +}) +data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) +data$shot$body_part <- data$shot$body_part %>% select(-id) +data$shot$technique <- data$shot$technique %>% select(-id) +data$shot$type <- data$shot$type %>% select(-id) +data$position <- data$position %>% select(-id) +data$shot <- data$shot %>% select(-end_location) +tryCatch({ # TODO reduce error cases +if ("one_on_one" %in% colnames(data$shot)) { +data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE +} else { +data$shot$one_on_one <- FALSE +} +if ("first_time" %in% colnames(data$shot)) { +data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE +} else { +data$shot$first_time <- FALSE +} +if ("aerial_won" %in% colnames(data$shot)) { +data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE +} else { +data$shot$aerial_won <- FALSE +} +if ("saved_to_post" %in% colnames(data$shot)) { +data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE +} else { +data$shot$saved_to_post <- FALSE +} +if ("deflected" %in% colnames(data$shot)) { +data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE +} else { +data$shot$deflected <- FALSE +} +if ("saved_off_target" %in% colnames(data$shot)) { +data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE +} else { +data$shot$saved_off_target <- FALSE +} +if ("open_goal" %in% colnames(data$shot)) { +data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE +} else { +data$shot$open_goal <- FALSE +} +if ("follows_dribble" %in% colnames(data$shot)) { +data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE +} else { +data$shot$follows_dribble <- FALSE +} +if ("redirect" %in% colnames(data$shot)) { +data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE +} else { +data$shot$redirect <- FALSE +} +if ("kick_off" %in% colnames(data$kick_off)) { +data[is.na(data$kick_off$kick_off), ]$kick_off$kick_off <- FALSE +} else { +data$kick_off$kick_off <- FALSE +} +}, +error = function(e) { +# handle the error +print(paste("An error occurred:", e$message)) +}) +data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)), +is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>% +select(-location) +data$shot$outcome <- data$shot$outcome %>% select(-id) +data <- data %>% unnest(shot, names_sep = "_") +data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle) +data +} +data_test <- get_shots2("data/la_liga_events/303377.json") +View(data_test) +# TODO not all json data have these same keys/fields, customize function to cover all cases +get_shots2 <- function(json_file) { +data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) +df_temp <- do.call(rbind, data$location) +colnames(df_temp) <- c("x1", "y1") +data$x1 <- df_temp[,1] +data$y1 <- df_temp[,2] +tryCatch({ # TODO reduce error cases +data$number_of_players <- mapply(function(sublist, x1_threshold) { +# Extracting the first location value and converting it to numeric +first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) +if ("teammate" %in% names(sublist)) { +# Filtering and counting +res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here +} else { +res <- 0 +} +res +}, data$shot$freeze_frame, data$x1) +}, +error = function(e) { +# handle the error +print(paste("An error occurred:", e$message)) +}) +data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) +data$shot$body_part <- data$shot$body_part %>% select(-id) +data$shot$technique <- data$shot$technique %>% select(-id) +data$shot$type <- data$shot$type %>% select(-id) +data$position <- data$position %>% select(-id) +data$shot <- data$shot %>% select(-end_location) +tryCatch({ # TODO reduce error cases +if ("one_on_one" %in% colnames(data$shot)) { +data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE +} else { +data$shot$one_on_one <- FALSE +} +if ("first_time" %in% colnames(data$shot)) { +data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE +} else { +data$shot$first_time <- FALSE +} +if ("aerial_won" %in% colnames(data$shot)) { +data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE +} else { +data$shot$aerial_won <- FALSE +} +if ("saved_to_post" %in% colnames(data$shot)) { +data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE +} else { +data$shot$saved_to_post <- FALSE +} +if ("deflected" %in% colnames(data$shot)) { +data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE +} else { +data$shot$deflected <- FALSE +} +if ("saved_off_target" %in% colnames(data$shot)) { +data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE +} else { +data$shot$saved_off_target <- FALSE +} +if ("open_goal" %in% colnames(data$shot)) { +data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE +} else { +data$shot$open_goal <- FALSE +} +if ("follows_dribble" %in% colnames(data$shot)) { +data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE +} else { +data$shot$follows_dribble <- FALSE +} +if ("redirect" %in% colnames(data$shot)) { +data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE +} else { +data$shot$redirect <- FALSE +} +if ("kick_off" %in% colnames(data$kick_off)) { +data[is.na(data$kick_off$kick_off), ]$kick_off$kick_off <- FALSE +} else { +data$kick_off$kick_off <- FALSE +} +}, +error = function(e) { +# handle the error +print(paste("An error occurred:", e$message)) +}) +data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)), +is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>% +select(-location) +data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle) +data$shot$outcome <- data$shot$outcome %>% select(-id) +data <- data %>% unnest(shot, names_sep = "_") +%>% unnest(position, names_sep = "_") +# TODO not all json data have these same keys/fields, customize function to cover all cases +get_shots2 <- function(json_file) { +data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) +df_temp <- do.call(rbind, data$location) +colnames(df_temp) <- c("x1", "y1") +data$x1 <- df_temp[,1] +data$y1 <- df_temp[,2] +tryCatch({ # TODO reduce error cases +data$number_of_players <- mapply(function(sublist, x1_threshold) { +# Extracting the first location value and converting it to numeric +first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) +if ("teammate" %in% names(sublist)) { +# Filtering and counting +res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here +} else { +res <- 0 +} +res +}, data$shot$freeze_frame, data$x1) +}, +error = function(e) { +# handle the error +print(paste("An error occurred:", e$message)) +}) +data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) +data$shot$body_part <- data$shot$body_part %>% select(-id) +data$shot$technique <- data$shot$technique %>% select(-id) +data$shot$type <- data$shot$type %>% select(-id) +data$position <- data$position %>% select(-id) +data$shot <- data$shot %>% select(-end_location) +tryCatch({ # TODO reduce error cases +if ("one_on_one" %in% colnames(data$shot)) { +data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE +} else { +data$shot$one_on_one <- FALSE +} +if ("first_time" %in% colnames(data$shot)) { +data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE +} else { +data$shot$first_time <- FALSE +} +if ("aerial_won" %in% colnames(data$shot)) { +data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE +} else { +data$shot$aerial_won <- FALSE +} +if ("saved_to_post" %in% colnames(data$shot)) { +data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE +} else { +data$shot$saved_to_post <- FALSE +} +if ("deflected" %in% colnames(data$shot)) { +data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE +} else { +data$shot$deflected <- FALSE +} +if ("saved_off_target" %in% colnames(data$shot)) { +data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE +} else { +data$shot$saved_off_target <- FALSE +} +if ("open_goal" %in% colnames(data$shot)) { +data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE +} else { +data$shot$open_goal <- FALSE +} +if ("follows_dribble" %in% colnames(data$shot)) { +data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE +} else { +data$shot$follows_dribble <- FALSE +} +if ("redirect" %in% colnames(data$shot)) { +data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE +} else { +data$shot$redirect <- FALSE +} +if ("kick_off" %in% colnames(data$kick_off)) { +data[is.na(data$kick_off$kick_off), ]$kick_off$kick_off <- FALSE +} else { +data$kick_off$kick_off <- FALSE +} +}, +error = function(e) { +# handle the error +print(paste("An error occurred:", e$message)) +}) +data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)), +is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>% +select(-location) +data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle) +data$shot$outcome <- data$shot$outcome %>% select(-id) +data <- data %>% unnest(shot, names_sep = "_") %>% +unnest(position, names_sep = "_") %>% +unnest(shot_type, names_sep = "_") %>% +unnest(shot_outcome, names_sep = "_") %>% +unnest(shot_technique, names_sep = "_") +data +} +data_test <- get_shots2("data/la_liga_events/303377.json") +View(data_test) +write.csv(data_test, file = "data/data_test.csv") +colnames(data_test) +data_test$kick_off +data_test$shot_body_part +data_df <- as.data.frame(data_test) +# TODO not all json data have these same keys/fields, customize function to cover all cases +get_shots2 <- function(json_file) { +data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) +df_temp <- do.call(rbind, data$location) +colnames(df_temp) <- c("x1", "y1") +data$x1 <- df_temp[,1] +data$y1 <- df_temp[,2] +tryCatch({ # TODO reduce error cases +data$number_of_players <- mapply(function(sublist, x1_threshold) { +# Extracting the first location value and converting it to numeric +first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) +if ("teammate" %in% names(sublist)) { +# Filtering and counting +res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here +} else { +res <- 0 +} +res +}, data$shot$freeze_frame, data$x1) +}, +error = function(e) { +# handle the error +print(paste("An error occurred:", e$message)) +}) +data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) +data$shot$body_part <- data$shot$body_part %>% select(-id) +data$shot$technique <- data$shot$technique %>% select(-id) +data$shot$type <- data$shot$type %>% select(-id) +data$position <- data$position %>% select(-id) +data$shot <- data$shot %>% select(-end_location) +tryCatch({ # TODO reduce error cases +if ("one_on_one" %in% colnames(data$shot)) { +data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE +} else { +data$shot$one_on_one <- FALSE +} +if ("first_time" %in% colnames(data$shot)) { +data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE +} else { +data$shot$first_time <- FALSE +} +if ("aerial_won" %in% colnames(data$shot)) { +data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE +} else { +data$shot$aerial_won <- FALSE +} +if ("saved_to_post" %in% colnames(data$shot)) { +data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE +} else { +data$shot$saved_to_post <- FALSE +} +if ("deflected" %in% colnames(data$shot)) { +data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE +} else { +data$shot$deflected <- FALSE +} +if ("saved_off_target" %in% colnames(data$shot)) { +data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE +} else { +data$shot$saved_off_target <- FALSE +} +if ("open_goal" %in% colnames(data$shot)) { +data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE +} else { +data$shot$open_goal <- FALSE +} +if ("follows_dribble" %in% colnames(data$shot)) { +data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE +} else { +data$shot$follows_dribble <- FALSE +} +if ("redirect" %in% colnames(data$shot)) { +data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE +} else { +data$shot$redirect <- FALSE +} +if ("kick_off" %in% colnames(data$kick_off)) { +data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE +} else { +data$kick_off <- FALSE +} +}, +error = function(e) { +# handle the error +print(paste("An error occurred:", e$message)) +}) +data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)), +is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>% +select(-location) +data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle) +data$shot$outcome <- data$shot$outcome %>% select(-id) +data <- data %>% unnest(shot, names_sep = "_") %>% +unnest(position, names_sep = "_") %>% +unnest(shot_type, names_sep = "_") %>% +unnest(shot_outcome, names_sep = "_") %>% +unnest(shot_technique, names_sep = "_") %>% +unnest(shot_body_part, names_sep = "_") +data +} +data_test <- get_shots2("data/la_liga_events/303377.json") +write.csv(data_test, file = "data/data_test.csv") +file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json") +data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2) +combined_data <- do.call(rbind, data_list) +data_test <- data <- fromJSON(("data/la_liga_events/303377.json") %>% filter(type$name == "Shot") +data_test <- fromJSON("data/la_liga_events/303377.json") %>% filter(type$name == "Shot") +data_test <- fromJSON("data/la_liga_events/303377.json") %>% filter(type$name == "Shot") +data_test$id +data_test$index +data4 <- read.csv("data/data4.csv", nrows = 1000) +View(data4) +# TODO not all json data have these same keys/fields, customize function to cover all cases +get_shots2 <- function(json_file) { +data <- fromJSON(json_file) %>% filter(type$name == "Shot") %>% dplyr::select(c(minute, position, location, shot)) +df_temp <- do.call(rbind, data$location) +colnames(df_temp) <- c("x1", "y1") +data$x1 <- df_temp[,1] +data$y1 <- df_temp[,2] +tryCatch({ # TODO reduce error cases +data$number_of_players_opponents <- mapply(function(sublist, x1_threshold) { +# Extracting the first location value and converting it to numeric +first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) +if ("teammate" %in% names(sublist)) { +# Filtering and counting +res <- sum(!sublist$teammate & first_location_values > x1_threshold) # error here +} else { +res <- 0 +} +res +}, data$shot$freeze_frame, data$x1) +}, +error = function(e) { +# handle the error +print(paste("An error occurred:", e$message)) +}) +tryCatch({ # TODO reduce error cases +data$number_of_players_teammates <- mapply(function(sublist, x1_threshold) { +# Extracting the first location value and converting it to numeric +first_location_values <- sapply(sublist$location, function(loc) as.numeric(loc[1])) +if ("teammate" %in% names(sublist)) { +# Filtering and counting +res <- sum(sublist$teammate & first_location_values > x1_threshold) # error here +} else { +res <- 0 +} +res +}, data$shot$freeze_frame, data$x1) +}, +error = function(e) { +# handle the error +print(paste("An error occurred:", e$message)) +}) +data$shot <- data$shot %>% select(-freeze_frame, -statsbomb_xg, -key_pass_id) +data$shot$body_part <- data$shot$body_part %>% select(-id) +data$shot$technique <- data$shot$technique %>% select(-id) +data$shot$type <- data$shot$type %>% select(-id) +data$position <- data$position %>% select(-id) +data$shot <- data$shot %>% select(-end_location) +tryCatch({ # TODO reduce error cases +if ("one_on_one" %in% colnames(data$shot)) { +data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE +} else { +data$shot$one_on_one <- FALSE +} +if ("first_time" %in% colnames(data$shot)) { +data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE +} else { +data$shot$first_time <- FALSE +} +if ("aerial_won" %in% colnames(data$shot)) { +data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE +} else { +data$shot$aerial_won <- FALSE +} +if ("saved_to_post" %in% colnames(data$shot)) { +data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE +} else { +data$shot$saved_to_post <- FALSE +} +if ("deflected" %in% colnames(data$shot)) { +data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE +} else { +data$shot$deflected <- FALSE +} +if ("saved_off_target" %in% colnames(data$shot)) { +data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE +} else { +data$shot$saved_off_target <- FALSE +} +if ("open_goal" %in% colnames(data$shot)) { +data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE +} else { +data$shot$open_goal <- FALSE +} +if ("follows_dribble" %in% colnames(data$shot)) { +data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE +} else { +data$shot$follows_dribble <- FALSE +} +if ("redirect" %in% colnames(data$shot)) { +data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE +} else { +data$shot$redirect <- FALSE +} +if ("kick_off" %in% colnames(data$kick_off)) { +data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE +} else { +data$kick_off <- FALSE +} +}, +error = function(e) { +# handle the error +print(paste("An error occurred:", e$message)) +}) +data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)), +is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>% +select(-location) +data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle) +data$shot$outcome <- data$shot$outcome %>% select(-id) +data <- data %>% unnest(shot, names_sep = "_") %>% +unnest(position, names_sep = "_") %>% +unnest(shot_type, names_sep = "_") %>% +unnest(shot_outcome, names_sep = "_") %>% +unnest(shot_technique, names_sep = "_") %>% +unnest(shot_body_part, names_sep = "_") +data +} +# sample data +data_test <- get_shots2("data/la_liga_events/303377.json") +write.csv(data_test, file = "data/data3_test.csv") +data1 <- read.csv("data/data1.csv", nrows = 1000) +colnames(data1) +View(data1) +data2 <- read.csv("data/data2.csv", nrows = 1000) +View(data2) +data3 <- read.csv("data/data3_test.csv", nrows = 1000) +colnames(data3) +View(data3) diff --git a/data/.DS_Store b/data/.DS_Store index 7c4f19472941a102db7c19594546e7e1492b749c..6fd131d0d61c5123e1118d3673a521e46db668b0 100644 GIT binary patch delta 89 zcmZp1XmOYj$!*A>$B@iW%uwc;v$1d*`@{y`&FmZ;9FsEzrg8Hzq%b5hlmM~uWDY^Y t$!7()7)>V66B3^sDIl=fQt%wx#0Is^>=NHtCLb5}+gvC*jcHDvW zRGrF@$B@pD2_#d2un34#Cf5mw^R8fEV3_>PnovLBMdcu;4ki T&Fm82SSD8smu&VHoy-IP991a( diff --git a/notebooks/dataCleaning.R b/notebooks/dataCleaning.R index 0305d2f..527ad46 100644 --- a/notebooks/dataCleaning.R +++ b/notebooks/dataCleaning.R @@ -53,16 +53,16 @@ get_shots <- function(file_path, name_detail, save_files = F){ shots_ok } -shotsEN <- get_shots("data/events/events_England.json", "EN") -shotsSP <- get_shots("data/events/events_Spain.json", "SP") -shotsWC <- get_shots("data/events/events_World_Cup.json", "WC") -shotsIT <- get_shots("data/events/events_Italy.json", "IT") -shotsGE <- get_shots("data/events/events_Germany.json", "GE") -shotsFR <- get_shots("data/events/events_France.json", "FR") -shotsEC <- get_shots("data/events/events_European_Championship.json", "EC") - -shots <- shotsEN %>% - bind_rows(shotsFR, shotsGE, shotsIT, shotsSP, shotsWC, shotsEC) +# shotsEN <- get_shots("data/events/events_England.json", "EN") +# shotsSP <- get_shots("data/events/events_Spain.json", "SP") +# shotsWC <- get_shots("data/events/events_World_Cup.json", "WC") +# shotsIT <- get_shots("data/events/events_Italy.json", "IT") +# shotsGE <- get_shots("data/events/events_Germany.json", "GE") +# shotsFR <- get_shots("data/events/events_France.json", "FR") +# shotsEC <- get_shots("data/events/events_European_Championship.json", "EC") +# +# shots <- shotsEN %>% +# bind_rows(shotsFR, shotsGE, shotsIT, shotsSP, shotsWC, shotsEC) get_final_data <- function(data) { @@ -78,8 +78,8 @@ get_final_data <- function(data) { data } -data1 <- get_final_data(shots) -write.csv(data1, file = "data/data1.csv") +# data1 <- get_final_data(shots) +# write.csv(data1, file = "data/data1.csv") ##################### The second dataset ############################## @@ -96,8 +96,8 @@ get_data <- function(event_path, info_path) { } -data2 <- get_data(event_path = "data/events.csv", info_path = "data/ginf.csv") -write.csv(data2, file = "data/data2.csv") +# data2 <- get_data(event_path = "data/events.csv", info_path = "data/ginf.csv") +# write.csv(data2, file = "data/data2.csv") # TODO dodać kolumne z x y coordinates bazując na location # TODO dodać kolumnę z kątem do bramki @@ -241,19 +241,17 @@ get_shots2 <- function(json_file) { data } -file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json") -data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2) -combined_data <- do.call(rbind, data_list) - -# sample data -data_test <- get_shots2("data/la_liga_events/303377.json") -write.csv(data_test, file = "data/data3_test.csv") +# file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json") +# data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2) +# combined_data <- do.call(rbind, data_list) +# ss <- lapply(data_list, nrow) +# sss <- unlist(ss) +# +# # sample data +# data_test <- get_shots2("data/la_liga_events/303377.json") +# write.csv(data_test, file = "data/data3_test.csv") ##################### The fourth dataset ############################## -data4 <- read.csv("data/data4.csv", nrows = 1000) -data1 <- read.csv("data/data1.csv", nrows = 1000) -data2 <- read.csv("data/data2.csv", nrows = 1000) -data3 <- read.csv("data/data3_test.csv", nrows = 1000) diff --git a/testy/tests.r b/testy/tests.r new file mode 100644 index 0000000..3d8126c --- /dev/null +++ b/testy/tests.r @@ -0,0 +1,32 @@ +library(tinytest) + +# load all functions +source("notebooks/dataCleaning.R") +# set working directory on fantastyczne_gole +# setwd("~/Desktop/fantastyczne_gole") +# make sure that have all packages installed (from dataCleaning.R) + +# generate random data +random_data <- as.data.frame(x = rnorm(100), + y = rnorm(100, mean = 1, sd = 3)) + +# unit tests +expect_silent( + data <- get_shots(file_path = "data/events/events_England.json", name_detail = "EN") +) + +expect_silent( + data <- get_shots2(json_file = "data/la_liga_events/ (1).json") +) + +expect_error( + get_final_data(data = random_data) +) + +data1 <- get_shots2(json_file = "data/la_liga_events/ (1).json") +data2 <- get_shots2(json_file = "data/la_liga_events/ (2).json") + +expect_length(colnames(data1), 22) +expect_length(colnames(data2), 22) +expect_false(is.vector(data1)) +expect_true(is.data.frame(data1))