more cleaning

This commit is contained in:
LukaszChrostowski 2023-11-27 10:20:04 +01:00
parent 158b8af794
commit a98353e257
2 changed files with 91 additions and 9 deletions

19
data/data_test.csv Normal file
View File

@ -0,0 +1,19 @@
"","minute","position_name","shot_body_part_name","shot_type_name","shot_outcome_name","shot_technique_name","shot_one_on_one","shot_first_time","shot_aerial_won","shot_saved_to_post","shot_deflected","shot_saved_off_target","shot_open_goal","shot_follows_dribble","shot_redirect","x1","y1","number_of_players","kick_off","angle","is_goal"
"1",11,"Right Wing","Left Foot","Open Play","Blocked","Normal",FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,93.2,44.4,9,FALSE,0.0640059213742957,0
"2",13,"Right Wing","Left Foot","Open Play","Saved","Lob",TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,100.3,42.8,1,FALSE,0.061730105843871,0
"3",21,"Right Center Forward","Right Foot","Open Play","Blocked","Normal",TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,110.1,28,1,FALSE,0.0624299048089645,0
"4",25,"Left Midfield","Left Foot","Open Play","Off T","Half Volley",FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,108.8,25,5,FALSE,0.0638870054917821,0
"5",32,"Center Forward","Left Foot","Open Play","Goal","Lob",FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,107.1,42.5,2,FALSE,0.0590398311560372,1
"6",38,"Right Back","Left Foot","Open Play","Goal","Normal",FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,107.7,47.3,6,FALSE,0.0569702959919461,1
"7",42,"Right Wing","Left Foot","Free Kick","Off T","Normal",FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,87.3,45.3,11,FALSE,0.0660566548690278,0
"8",47,"Right Wing","Head","Open Play","Saved","Normal",TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,112,32.2,1,FALSE,0.060353615283394,0
"9",49,"Right Defensive Midfield","Right Foot","Free Kick","Off T","Normal",FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,84.7,35.2,11,FALSE,0.0736787207689688,0
"10",58,"Right Wing","Right Foot","Open Play","Blocked","Normal",FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,115.6,51.7,2,FALSE,0.052762547731953,0
"11",65,"Right Center Forward","Right Foot","Open Play","Goal","Volley",FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,104.3,37.6,3,FALSE,0.0620981693525753,1
"12",69,"Left Wing","Right Foot","Open Play","Saved","Normal",FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,101.2,24.7,4,FALSE,0.068243631613985,0
"13",71,"Right Center Forward","Right Foot","Open Play","Saved","Volley",FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,115.5,44.1,1,FALSE,0.0553048979119624,0
"14",73,"Left Wing","Left Foot","Open Play","Blocked","Normal",FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,108.3,33.4,6,FALSE,0.0617057300721472,0
"15",73,"Center Forward","Right Foot","Open Play","Off T","Normal",FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,107.7,39.4,6,FALSE,0.0599333680981008,0
"16",76,"Center Forward","Right Foot","Open Play","Off T","Half Volley",TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,108.7,52.1,1,FALSE,0.0547568224463756,0
"17",79,"Left Center Forward","Right Foot","Open Play","Off T","Volley",FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,113.9,34.4,4,FALSE,0.0588824107265361,0
"18",89,"Right Center Forward","Left Foot","Open Play","Off T","Normal",FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,100.2,51.6,7,FALSE,0.0577379599935821,0
1 minute position_name shot_body_part_name shot_type_name shot_outcome_name shot_technique_name shot_one_on_one shot_first_time shot_aerial_won shot_saved_to_post shot_deflected shot_saved_off_target shot_open_goal shot_follows_dribble shot_redirect x1 y1 number_of_players kick_off angle is_goal
2 1 11 Right Wing Left Foot Open Play Blocked Normal FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 93.2 44.4 9 FALSE 0.0640059213742957 0
3 2 13 Right Wing Left Foot Open Play Saved Lob TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 100.3 42.8 1 FALSE 0.061730105843871 0
4 3 21 Right Center Forward Right Foot Open Play Blocked Normal TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 110.1 28 1 FALSE 0.0624299048089645 0
5 4 25 Left Midfield Left Foot Open Play Off T Half Volley FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 108.8 25 5 FALSE 0.0638870054917821 0
6 5 32 Center Forward Left Foot Open Play Goal Lob FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 107.1 42.5 2 FALSE 0.0590398311560372 1
7 6 38 Right Back Left Foot Open Play Goal Normal FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 107.7 47.3 6 FALSE 0.0569702959919461 1
8 7 42 Right Wing Left Foot Free Kick Off T Normal FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 87.3 45.3 11 FALSE 0.0660566548690278 0
9 8 47 Right Wing Head Open Play Saved Normal TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 112 32.2 1 FALSE 0.060353615283394 0
10 9 49 Right Defensive Midfield Right Foot Free Kick Off T Normal FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 84.7 35.2 11 FALSE 0.0736787207689688 0
11 10 58 Right Wing Right Foot Open Play Blocked Normal FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 115.6 51.7 2 FALSE 0.052762547731953 0
12 11 65 Right Center Forward Right Foot Open Play Goal Volley FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 104.3 37.6 3 FALSE 0.0620981693525753 1
13 12 69 Left Wing Right Foot Open Play Saved Normal FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 101.2 24.7 4 FALSE 0.068243631613985 0
14 13 71 Right Center Forward Right Foot Open Play Saved Volley FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 115.5 44.1 1 FALSE 0.0553048979119624 0
15 14 73 Left Wing Left Foot Open Play Blocked Normal FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 108.3 33.4 6 FALSE 0.0617057300721472 0
16 15 73 Center Forward Right Foot Open Play Off T Normal FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 107.7 39.4 6 FALSE 0.0599333680981008 0
17 16 76 Center Forward Right Foot Open Play Off T Half Volley TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 108.7 52.1 1 FALSE 0.0547568224463756 0
18 17 79 Left Center Forward Right Foot Open Play Off T Volley FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 113.9 34.4 4 FALSE 0.0588824107265361 0
19 18 89 Right Center Forward Left Foot Open Play Off T Normal FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 100.2 51.6 7 FALSE 0.0577379599935821 0

View File

@ -142,11 +142,65 @@ get_shots2 <- function(json_file) {
data$shot <- data$shot %>% select(-end_location)
tryCatch({ # TODO reduce error cases
if ("one_on_one" %in% colnames(data$shot)) data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE
if ("first_time" %in% colnames(data$shot)) data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE
if ("aerial_won" %in% colnames(data$shot)) data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE
# data$shot$one_on_one[is.na(data$shot$one_on_one)] <- FALSE
# data$shot$first_time[is.na(data$shot$first_time)] <- FALSE
if ("one_on_one" %in% colnames(data$shot)) {
data[is.na(data$shot$one_on_one), ]$shot$one_on_one <- FALSE
} else {
data$shot$one_on_one <- FALSE
}
if ("first_time" %in% colnames(data$shot)) {
data[is.na(data$shot$first_time), ]$shot$first_time <- FALSE
} else {
data$shot$first_time <- FALSE
}
if ("aerial_won" %in% colnames(data$shot)) {
data[is.na(data$shot$aerial_won), ]$shot$aerial_won <- FALSE
} else {
data$shot$aerial_won <- FALSE
}
if ("saved_to_post" %in% colnames(data$shot)) {
data[is.na(data$shot$saved_to_post), ]$shot$saved_to_post <- FALSE
} else {
data$shot$saved_to_post <- FALSE
}
if ("deflected" %in% colnames(data$shot)) {
data[is.na(data$shot$deflected), ]$shot$deflected <- FALSE
} else {
data$shot$deflected <- FALSE
}
if ("saved_off_target" %in% colnames(data$shot)) {
data[is.na(data$shot$saved_off_target), ]$shot$saved_off_target <- FALSE
} else {
data$shot$saved_off_target <- FALSE
}
if ("open_goal" %in% colnames(data$shot)) {
data[is.na(data$shot$open_goal), ]$shot$open_goal <- FALSE
} else {
data$shot$open_goal <- FALSE
}
if ("follows_dribble" %in% colnames(data$shot)) {
data[is.na(data$shot$follows_dribble), ]$shot$follows_dribble <- FALSE
} else {
data$shot$follows_dribble <- FALSE
}
if ("redirect" %in% colnames(data$shot)) {
data[is.na(data$shot$redirect), ]$shot$redirect <- FALSE
} else {
data$shot$redirect <- FALSE
}
if ("kick_off" %in% colnames(data$kick_off)) {
data[is.na(data$shot$kick_off), ]$shotf$kick_off <- FALSE
} else {
data$kick_off <- FALSE
}
},
error = function(e) {
# handle the error
@ -156,13 +210,22 @@ get_shots2 <- function(json_file) {
data <- data %>% mutate(angle = atan(7.32 * x1 / (x1^2 + y1^2 - (7.32/2)^2)),
is_goal = ifelse(shot$outcome$id == 97, 1, 0)) %>%
select(-location)
data$shot$outcome <- data$shot$outcome %>% select(-id)
data$angle <- ifelse(data$angle<0, base::pi + data$angle, data$angle)
data$shot$outcome <- data$shot$outcome %>% select(-id)
data <- data %>% unnest(shot, names_sep = "_") %>%
unnest(position, names_sep = "_") %>%
unnest(shot_type, names_sep = "_") %>%
unnest(shot_outcome, names_sep = "_") %>%
unnest(shot_technique, names_sep = "_") %>%
unnest(shot_body_part, names_sep = "_")
data
}
file_names <- list.files(path = "data/la_liga_events/", pattern = "*.json")
data_list <- lapply(paste("data/la_liga_events/", file_names, sep = ""), get_shots2)
# combined_data <- do.call(rbind, data_list[1:10])
combined_data <- data_list[[1]] # TODO unlist data
write.csv(combined_data, file = "data/data3.csv")
combined_data <- do.call(rbind, data_list[1:10])
# sample data
data_test <- get_shots2("data/la_liga_events/303377.json")
write.csv(data_test, file = "data/data_test.csv")