przemowoAnalizator/piosenki/translator.py

45 lines
1.5 KiB
Python

from bs4 import BeautifulSoup
from googletrans import Translator
f = open("./texts.htm", 'r', encoding="windows-1251")
def writeSong(name,text):
name = name.replace("'", '').replace("?",'').replace(" ", "_").replace("\n",'').replace('"', '').replace(":", '').replace(".",'').replace(",",'').replace("/",'')
with open(name+".txt", "w+", encoding="utf-8") as f:
f.write(text)
s = BeautifulSoup(f.read())
rows = s.select("tr")
t=Translator()
current_title = ""
current_original_text = ""
current_english_text = ""
for i,row in enumerate(rows[4700:]):
#is title?
try:
#is title
title = row.select('font[size="+1"]')[0]
current_title=title.text.replace('\n', '')
print(f"title: {current_title}")
print(f"row #{4700+i}")
except IndexError:
#is not title
#is text?
print("text")
# print(len(row.text))
if len(row.text)>4:
#is text
current_original_text = row.text
to_translate = current_title+"\n"+current_original_text
try:
# print(to_translate)
translated = t.translate(to_translate)
current_english_text = translated.text
except:
print("error encoding")
from time import sleep
sleep(12)
continue
writeSong(current_english_text[:25], current_english_text)
else:
print("empty row")
pass