import requests from bs4 import BeautifulSoup index_src = requests.get("https://www.marxists.org/reference/archive/stalin/works/war/index.htm").text index = BeautifulSoup(index_src) links = index.select("strong a") for link in links: print("https://www.marxists.org/reference/archive/stalin/works/war/"+link['href']) speech = BeautifulSoup(requests.get("https://www.marxists.org/reference/archive/stalin/works"+link['href'].replace("..",'')).text) paragraphs = speech.select("p") text="" for p in paragraphs: text+=p.text print(text) with open("war/"+speech.title.text.replace("'", '').replace(" ", "_").replace('"', '').replace(":", '').replace(".",'').replace(",",'')+".txt", "w+", encoding="utf-8") as f: f.write(text)