import requests import re from bs4 import BeautifulSoup from collections import OrderedDict class scrap: # Sezon 18/19 kolejka 10 # https://www.transfermarkt.pl/ekstraklasa/formtabelle/wettbewerb/PL1?saison_id=2018&min=1&max=10 # Kolejka 15 # https: // www.transfermarkt.pl / ekstraklasa / formtabelle / wettbewerb / PL1?saison_id = 2018 & min = 1 & max = 15 # Kolejka 20 # https: // www.transfermarkt.pl / ekstraklasa / formtabelle / wettbewerb / PL1?saison_id = 2018 & min = 1 & max = 20 # wartość klubów # https: // www.transfermarkt.pl / pko - ekstraklasa / marktwerteverein / wettbewerb / PL1 / plus /?stichtag = 2018 - 10 - 01 # Poprzedni sezon # https://www.transfermarkt.pl/pko-ekstraklasa/tabelle/wettbewerb/PL1?saison_id=2017 def scrapTeamValue(self): url = 'https://www.transfermarkt.pl/pko-ekstraklasa/marktwerteverein/wettbewerb/PL1/plus/?stichtag=2018-10-01' headers = {'User-Agent': 'Mozilla/5.0'} req = requests.get(url, headers=headers) soup = BeautifulSoup(req.text, 'html.parser') body = soup.body table = body.find('table', {'class': 'items'}) bodyTable = table.find('tbody') values = bodyTable.findAll('td', {'class': 'rechts'}) teamValues = [] teams = [] for value in values: a = value.findAll('a') for item in a: if item.has_attr('title'): teams.append(item['title'].rstrip()) match = re.match(r'.+€', value.text) if match: num = value.text.replace(',','.') num = re.match(r'[0-9]+[.][0-9]+',num) teamValues.append(float(num.group())) teams = list(OrderedDict.fromkeys(teams)) dictTeams = {} i=0 for team in teams: dictTeams[team] = teamValues[i] i += 2 return dictTeams def scrapTeamsTable(self): url = 'https://www.transfermarkt.pl/ekstraklasa/formtabelle/wettbewerb/PL1?saison_id=2017&min=1&max=10' headers = {'User-Agent': 'Mozilla/5.0'} req = requests.get(url, headers=headers) soup = BeautifulSoup(req.text, 'html.parser') body = soup.body body = body.find('div', {'class': 'large-8 columns'}) table = body.findAll('div', {'class': 'responsive-table'}) i = 1 listTeams = [] listProperities = [] for x in table: x = x.find('tbody') trTable = x.findAll('tr') for item in trTable: item = item.text.replace(u'\xa0', '') teamProperities = re.split('\n', item) tab = [] for x in teamProperities: match = re.match(r'-?[0-9]+', x) if match: tab.append(float(match.group())) match = re.match(r'\D+$', x) if match: teamName = match.group().rstrip() tab[0] = i i+=1 listTeams.append(teamName) listProperities.append(tab) dictTeams = {} i=0 for team in listTeams: dictTeams[team] = listProperities[i] i += 1 return dictTeams def scrapPreviousPlace(self): url = 'https://www.transfermarkt.pl/pko-ekstraklasa/tabelle/wettbewerb/PL1?saison_id=2016' headers = {'User-Agent': 'Mozilla/5.0'} req = requests.get(url, headers=headers) soup = BeautifulSoup(req.text, 'html.parser') body = soup.body table = body.findAll('div', {'class': 'responsive-table'}) listTeams = [] i = 0 for item in table: td = item.findAll('td', {'class': 'no-border-links hauptlink'}) for t in td: t = t.text.replace(u'\n', '').rstrip() listTeams.append(t) dictTeams = {} i=1 for team in listTeams: dictTeams[team] = i i += 1 return dictTeams