import os import re import json def main(): jsonsDirectory = "./jsonsMatchesEndpoint" if not os.path.exists(jsonsDirectory): os.makedirs(jsonsDirectory) path = './STATYSTYKI DRUŻYNOWE 2019_20/' files = [] for r, d, f in os.walk(path): for file in f: if '.txt' in file: files.append(os.path.join(r, file)) for f in files: ff = open(f, "r") name = f.replace(".txt", "") directory = jsonsDirectory + "/" + name.replace(path, "") if not os.path.exists(directory): os.makedirs(directory) i = -1 matchJSON = {} for line in ff.readlines(): i = i + 1 if i < 3: continue splittedLine = line.split(";") if len(splittedLine) == 0: continue if splittedLine[2] != "\"Poland. Ekstraklasa\"": continue if i % 2 == 1: matchJSON = {} opponentName = opponentNameValue(splittedLine[1], f.replace(".txt", "").replace(path, "").replace("Team Stats", "").lstrip()) print(opponentName) out = open(directory + "/" + f.replace(path, "").replace(".txt", opponentName + " " + matchDateFieldValue(splittedLine[0]) + ".json"), "w") matchJSON["result"] = resultFieldValue(splittedLine[1]) matchJSON['matchDate'] = matchDateFieldValue(splittedLine[0]) matchJSON['teamsMatchStatistics1'] = teamsMatchStatisticsValue(splittedLine) else: matchJSON['teamsMatchStatistics2'] = teamsMatchStatisticsValue(splittedLine) matchJSON['teamsMatchStatistics1']['goals_lost'] = matchJSON['teamsMatchStatistics2']['goals'] matchJSON['teamsMatchStatistics2']['goals_lost'] = matchJSON['teamsMatchStatistics1']['goals'] b = json.dumps(matchJSON, ensure_ascii=False).encode('utf8') out.write(b.decode()) out.close() ff.close() def opponentNameValue(s, teamName): s = s.replace("\"", "") s = re.sub(r'[0-9]+:[0-9]+', "", s) s = s.rstrip() s = s.replace(teamName, "") s = s.replace("-", "") s = s.strip() return s def resultFieldValue(s): search = re.search(r'[0-9]+:[0-9]+', s) res = search.group(0) resList = res.split(":") resList1 = int(resList[0]) resList2 = int(resList[1]) if resList1 < resList2: return 1 elif resList1 == resList2: return 0 else: return 2 def matchDateFieldValue(s): yyyy = s[0:4] MM = s[5:7] dd = s[8:10] return dd + "-" + MM + "-" + yyyy def teamsMatchStatisticsValue(s): result = {'team': {'name': s[4].replace("\"", "")}} result['formation'] = s[5].replace("\"", "").replace("(", "").replace(")", "") result['formation'] = re.sub(r'[0-9]+\.[0-9]+%', "",result['formation']) result['formation'] = result['formation'].rstrip() result['shootsOnTarget'] = int(s[9]) result['possession'] = float(s[14].replace(",", ".").replace("\"", "")) result['goals'] = int(s[6]) result['red_cards'] = int(s[77]) result['penaltyAreaEntries'] = int(s[53]) + int(s[54]) result['penalties'] = int(s[40]) result['goalkeeperSavesPercent'] = goalkeeperSavesPercentValue(s, s[4].replace("\"", "")) print(result) return result def goalkeeperSavesPercentValue(s, n): goalkeeperSavesPercent = 100 matchName = s[1] matchDate = s[0] teamDirName = n.replace("./STATYSTYKI DRUŻYNOWE 2019_20/Team Stats ", "").rstrip().upper() if teamDirName == "LECH POZNAŃ": teamDirName = teamDirName + " I" path = "./ZAWODNICY/" + teamDirName files = [] for r, d, f in os.walk(path): for file in f: if '.txt' in file: files.append(os.path.join(r, file)) if (teamDirName == "LECH POZNAŃ I"): teamDirName = "LECH POZNAN II" path = "./ZAWODNICY/" + teamDirName for r, d, f in os.walk(path): for file in f: if '.txt' in file: files.append(os.path.join(r, file)) for f in files: ff = open(f, "r") i = -1 for line in ff.readlines(): i = i + 1 if (i < 1): continue spl = line.split(";") if matchName == spl[0] and matchDate == spl[2] and "GK" in spl[3]: shots_against = int(spl[62]) saves = int(spl[63]) if shots_against != 0: goalkeeperSavesPercent = 100 * saves / shots_against break ff.close() return goalkeeperSavesPercent main()