from bs4 import BeautifulSoup from googletrans import Translator f = open("./texts.htm", 'r', encoding="windows-1251") def writeSong(name,text): name = name.replace("'", '').replace("?",'').replace(" ", "_").replace("\n",'').replace('"', '').replace(":", '').replace(".",'').replace(",",'').replace("/",'') with open(name+".txt", "w+", encoding="utf-8") as f: f.write(text) s = BeautifulSoup(f.read()) rows = s.select("tr") t=Translator() current_title = "" current_original_text = "" current_english_text = "" for i,row in enumerate(rows[4700:]): #is title? try: #is title title = row.select('font[size="+1"]')[0] current_title=title.text.replace('\n', '') print(f"title: {current_title}") print(f"row #{4700+i}") except IndexError: #is not title #is text? print("text") # print(len(row.text)) if len(row.text)>4: #is text current_original_text = row.text to_translate = current_title+"\n"+current_original_text try: # print(to_translate) translated = t.translate(to_translate) current_english_text = translated.text except: print("error encoding") from time import sleep sleep(12) continue writeSong(current_english_text[:25], current_english_text) else: print("empty row") pass