2020-11-15 16:26:19 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
from transformers import MarianTokenizer, MarianMTModel
|
|
|
|
import pickle
|
|
|
|
import sys
|
|
|
|
import string
|
|
|
|
from googletrans import Translator
|
|
|
|
import morfeusz2
|
|
|
|
import time
|
|
|
|
trans = Translator()
|
|
|
|
morf = morfeusz2.Morfeusz()
|
|
|
|
|
|
|
|
pl_letters = ['ą','ę','ł','ź','ć','ś','ń','ó','ż']
|
|
|
|
def pl_trans():
|
|
|
|
for line in sys.stdin:
|
|
|
|
new_line = line.rstrip()
|
2020-11-15 18:57:38 +01:00
|
|
|
p_line=line[0].upper()
|
|
|
|
for token in range(1,len(new_line)):
|
|
|
|
p_line = p_line + new_line[token]
|
2020-11-15 16:26:19 +01:00
|
|
|
print(p_line)
|
|
|
|
def process_words(voc):
|
|
|
|
lines = []
|
|
|
|
exclude = ['.','?','!',',','/','-','+','=',')','(','%','0','1','2','3','4','5','6','7','8','9','[',']',':',';',"'",'"']
|
|
|
|
confusion_words = ['on', 'one', 'no', 'my', 'knot', 'but', 'chart', 'prom', 'pup', 'much', 'lot', 'pan', 'herb', 'dude', 'to', 'wanna', 'unia', 'we']
|
|
|
|
file_to_process = {}
|
|
|
|
for idx,line in enumerate(sys.stdin):
|
|
|
|
line_to_process = line.rstrip().split()
|
|
|
|
processed_line = ''
|
|
|
|
new_line = []
|
|
|
|
for word in line_to_process:
|
|
|
|
en_word = ''
|
|
|
|
p_word = ''.join(w for w in word if w not in exclude)
|
|
|
|
analysis = morf.analyse(p_word)
|
|
|
|
for i,j,l in analysis:
|
|
|
|
lema = l[0]
|
|
|
|
if str(p_word) in voc.keys() and p_word.lower() not in confusion_words and p_word not in exclude:
|
|
|
|
en_word = voc[p_word]
|
|
|
|
elif str(lema) in voc.keys() and lema.lower() not in confusion_words and lema not in exclude:
|
|
|
|
en_word = voc[str(lema)]
|
|
|
|
else:
|
|
|
|
if p_word.lower() in confusion_words and check_letters(word.lower(),pl_letters):
|
|
|
|
en_word = word
|
|
|
|
else:
|
2020-11-15 18:27:16 +01:00
|
|
|
try:
|
|
|
|
en_word = trans.translate(word,dest='en',src='pl').text
|
|
|
|
except:
|
|
|
|
en_word = word
|
2020-11-15 16:26:19 +01:00
|
|
|
processed_line = processed_line + en_word + ' '
|
|
|
|
print(processed_line)
|
|
|
|
|
|
|
|
def check_letters(text,pl):
|
|
|
|
for ch in text:
|
|
|
|
if ch in pl:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
2020-11-15 18:57:38 +01:00
|
|
|
#voc = pickle.load(open('pl_en.pickle', 'rb'))
|
|
|
|
#process_words(voc)
|
|
|
|
pl_trans()
|