# -*- coding: utf-8 -*- from transformers import MarianTokenizer, MarianMTModel import pickle import sys import string from googletrans import Translator import morfeusz2 import time trans = Translator() morf = morfeusz2.Morfeusz() pl_letters = ['ą','ę','ł','ź','ć','ś','ń','ó','ż'] def pl_trans(): for line in sys.stdin: new_line = line.rstrip() p_line =new_line.capitalize() print(p_line) def process_words(voc): lines = [] exclude = ['.','?','!',',','/','-','+','=',')','(','%','0','1','2','3','4','5','6','7','8','9','[',']',':',';',"'",'"'] confusion_words = ['on', 'one', 'no', 'my', 'knot', 'but', 'chart', 'prom', 'pup', 'much', 'lot', 'pan', 'herb', 'dude', 'to', 'wanna', 'unia', 'we'] file_to_process = {} for idx,line in enumerate(sys.stdin): line_to_process = line.rstrip().split() processed_line = '' new_line = [] for word in line_to_process: en_word = '' p_word = ''.join(w for w in word if w not in exclude) analysis = morf.analyse(p_word) for i,j,l in analysis: lema = l[0] if str(p_word) in voc.keys() and p_word.lower() not in confusion_words and p_word not in exclude: en_word = voc[p_word] elif str(lema) in voc.keys() and lema.lower() not in confusion_words and lema not in exclude: en_word = voc[str(lema)] else: if p_word.lower() in confusion_words and check_letters(word.lower(),pl_letters): en_word = word else: en_word = trans.translate(word,dest='en',src='pl').text processed_line = processed_line + en_word + ' ' print(processed_line) def check_letters(text,pl): for ch in text: if ch in pl: return False return True #voc = pickle.load(open('pl_en.pickle', 'rb')) #process_words(voc) pl_trans()