16 lines
766 B
Python
16 lines
766 B
Python
|
import requests
|
||
|
from bs4 import BeautifulSoup
|
||
|
|
||
|
index_src = requests.get("https://www.marxists.org/reference/archive/stalin/works/war/index.htm").text
|
||
|
index = BeautifulSoup(index_src)
|
||
|
links = index.select("strong a")
|
||
|
for link in links:
|
||
|
print("https://www.marxists.org/reference/archive/stalin/works/war/"+link['href'])
|
||
|
speech = BeautifulSoup(requests.get("https://www.marxists.org/reference/archive/stalin/works"+link['href'].replace("..",'')).text)
|
||
|
paragraphs = speech.select("p")
|
||
|
text=""
|
||
|
for p in paragraphs:
|
||
|
text+=p.text
|
||
|
print(text)
|
||
|
with open("war/"+speech.title.text.replace("'", '').replace(" ", "_").replace('"', '').replace(":", '').replace(".",'').replace(",",'')+".txt", "w+", encoding="utf-8") as f:
|
||
|
f.write(text)
|