EkstraklasaPrzewidywanieWyn.../Data/Ekstraklasa_19_20/prepareMatchEndpointData.py

158 lines
4.7 KiB
Python
Raw Normal View History

2020-05-09 17:44:14 +02:00
import os
import re
import json
def main():
jsonsDirectory = "./jsonsMatchesEndpoint"
if not os.path.exists(jsonsDirectory):
os.makedirs(jsonsDirectory)
path = './STATYSTYKI DRUŻYNOWE 2019_20/'
files = []
for r, d, f in os.walk(path):
for file in f:
if '.txt' in file:
files.append(os.path.join(r, file))
for f in files:
ff = open(f, "r")
name = f.replace(".txt", "")
directory = jsonsDirectory + "/" + name.replace(path, "")
if not os.path.exists(directory):
os.makedirs(directory)
i = -1
matchJSON = {}
for line in ff.readlines():
i = i + 1
if i < 3:
continue
splittedLine = line.split(";")
if len(splittedLine) == 0:
continue
2020-05-10 10:56:19 +02:00
if splittedLine[2] != "\"Poland. Ekstraklasa\"":
continue
2020-05-09 17:44:14 +02:00
if i % 2 == 1:
matchJSON = {}
opponentName = opponentNameValue(splittedLine[1], f.replace(".txt", "").replace(path, "").replace("Team Stats", "").lstrip())
print(opponentName)
2020-05-10 10:56:19 +02:00
out = open(directory + "/" + f.replace(path, "").replace(".txt", opponentName + " " + matchDateFieldValue(splittedLine[0]) + ".json"), "w")
2020-05-09 17:44:14 +02:00
matchJSON["result"] = resultFieldValue(splittedLine[1])
matchJSON['matchDate'] = matchDateFieldValue(splittedLine[0])
matchJSON['teamsMatchStatistics1'] = teamsMatchStatisticsValue(splittedLine)
else:
matchJSON['teamsMatchStatistics2'] = teamsMatchStatisticsValue(splittedLine)
matchJSON['teamsMatchStatistics1']['goals_lost'] = matchJSON['teamsMatchStatistics2']['goals']
matchJSON['teamsMatchStatistics2']['goals_lost'] = matchJSON['teamsMatchStatistics1']['goals']
b = json.dumps(matchJSON, ensure_ascii=False).encode('utf8')
out.write(b.decode())
out.close()
ff.close()
def opponentNameValue(s, teamName):
s = s.replace("\"", "")
s = re.sub(r'[0-9]+:[0-9]+', "", s)
s = s.rstrip()
s = s.replace(teamName, "")
s = s.replace("-", "")
s = s.strip()
return s
def resultFieldValue(s):
search = re.search(r'[0-9]+:[0-9]+', s)
res = search.group(0)
resList = res.split(":")
resList1 = int(resList[0])
resList2 = int(resList[1])
if resList1 < resList2:
return 1
elif resList1 == resList2:
return 0
else:
return 2
def matchDateFieldValue(s):
yyyy = s[0:4]
MM = s[5:7]
dd = s[8:10]
return dd + "-" + MM + "-" + yyyy
def teamsMatchStatisticsValue(s):
result = {'team': {'name': s[4].replace("\"", "")}}
result['formation'] = s[5].replace("\"", "").replace("(", "").replace(")", "")
result['formation'] = re.sub(r'[0-9]+\.[0-9]+%', "",result['formation'])
result['formation'] = result['formation'].rstrip()
result['shootsOnTarget'] = int(s[9])
result['possession'] = float(s[14].replace(",", ".").replace("\"", ""))
result['goals'] = int(s[6])
result['red_cards'] = int(s[77])
result['penaltyAreaEntries'] = int(s[53]) + int(s[54])
result['penalties'] = int(s[40])
result['goalkeeperSavesPercent'] = goalkeeperSavesPercentValue(s, s[4].replace("\"", ""))
print(result)
return result
def goalkeeperSavesPercentValue(s, n):
goalkeeperSavesPercent = 100
matchName = s[1]
matchDate = s[0]
teamDirName = n.replace("./STATYSTYKI DRUŻYNOWE 2019_20/Team Stats ", "").rstrip().upper()
if teamDirName == "LECH POZNAŃ":
teamDirName = teamDirName + " I"
path = "./ZAWODNICY/" + teamDirName
files = []
for r, d, f in os.walk(path):
for file in f:
if '.txt' in file:
files.append(os.path.join(r, file))
if (teamDirName == "LECH POZNAŃ I"):
teamDirName = "LECH POZNAN II"
path = "./ZAWODNICY/" + teamDirName
for r, d, f in os.walk(path):
for file in f:
if '.txt' in file:
files.append(os.path.join(r, file))
for f in files:
ff = open(f, "r")
i = -1
for line in ff.readlines():
i = i + 1
if (i < 1):
continue
spl = line.split(";")
if matchName == spl[0] and matchDate == spl[2] and "GK" in spl[3]:
shots_against = int(spl[62])
saves = int(spl[63])
if shots_against != 0:
goalkeeperSavesPercent = 100 * saves / shots_against
break
ff.close()
return goalkeeperSavesPercent
main()