45 lines
1.5 KiB
Python
45 lines
1.5 KiB
Python
from bs4 import BeautifulSoup
|
|
from googletrans import Translator
|
|
f = open("./texts.htm", 'r', encoding="windows-1251")
|
|
def writeSong(name,text):
|
|
name = name.replace("'", '').replace("?",'').replace(" ", "_").replace("\n",'').replace('"', '').replace(":", '').replace(".",'').replace(",",'').replace("/",'')
|
|
with open(name+".txt", "w+", encoding="utf-8") as f:
|
|
f.write(text)
|
|
s = BeautifulSoup(f.read())
|
|
rows = s.select("tr")
|
|
t=Translator()
|
|
current_title = ""
|
|
current_original_text = ""
|
|
current_english_text = ""
|
|
for i,row in enumerate(rows[6930:]):
|
|
#is title?
|
|
try:
|
|
#is title
|
|
title = row.select('font[size="+1"]')[0]
|
|
current_title=title.text.replace('\n', '')
|
|
print(f"title: {current_title}")
|
|
print(f"row #{6930+i}")
|
|
except IndexError:
|
|
#is not title
|
|
#is text?
|
|
print("text")
|
|
# print(len(row.text))
|
|
if len(row.text)>4:
|
|
#is text
|
|
current_original_text = row.text
|
|
to_translate = current_title+"\n"+current_original_text
|
|
try:
|
|
# print(to_translate)
|
|
translated = t.translate(to_translate)
|
|
current_english_text = translated.text
|
|
except:
|
|
print("error encoding")
|
|
from time import sleep
|
|
sleep(12)
|
|
continue
|
|
writeSong(current_english_text[:25], current_english_text)
|
|
else:
|
|
print("empty row")
|
|
pass
|
|
|