import requests from bs4 import BeautifulSoup index_src = requests.get("https://www.marxists.org/reference/archive/stalin/works/subject/trotskyism/index.htm").text index = BeautifulSoup(index_src) links = index.select(".fst a") for link in links: print("https://www.marxists.org/reference/archive/stalin/works/"+link['href']) speech = BeautifulSoup(requests.get("https://www.marxists.org/reference/archive/stalin/works"+link['href'].replace("..",'')[1:]).text) paragraphs = speech.select("p") text="" for p in paragraphs: text+=p.text print(text) with open("trockizm/"+speech.title.text.replace("'", '').replace(" ", "_").replace('"', '').replace(":", '').replace(".",'').replace(",",'')+".txt", "w+", encoding="utf-8") as f: f.write(text)