data and req
This commit is contained in:
parent
bdf5732b70
commit
7af7794766
@ -15,7 +15,7 @@ def main():
|
||||
doc = requests.get(MAIN_URL + page_element['href'])
|
||||
doc_soup = BeautifulSoup(doc.text, 'lxml', from_encoding="utf-8")
|
||||
text_elem = doc_soup.find("div", {"class": "pagetext"}).next_element
|
||||
text = text_elem.text if not text_elem.find("math") else "math image"
|
||||
text = text_elem.text
|
||||
image_url = doc_soup.find("div", {"class": "prp-page-image"}).next_element['src']
|
||||
return {"title": page_element['title'], "href": MAIN_URL + page_element['href'], "image_url": image_url, "text": text,}
|
||||
|
||||
|
14
requirements.txt
Normal file
14
requirements.txt
Normal file
@ -0,0 +1,14 @@
|
||||
beautifulsoup4==4.11.1
|
||||
certifi==2022.12.7
|
||||
charset-normalizer==2.1.1
|
||||
idna==3.4
|
||||
lxml==4.9.2
|
||||
numpy==1.24.1
|
||||
pandas==1.5.2
|
||||
python-dateutil==2.8.2
|
||||
pytz==2022.7
|
||||
requests==2.28.1
|
||||
six==1.16.0
|
||||
soupsieve==2.3.2.post1
|
||||
tqdm==4.64.1
|
||||
urllib3==1.26.13
|
13353
yellow.tsv
Normal file
13353
yellow.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user