wmt-2020-pl-en/gru_attention.ipynb
2021-02-08 14:36:14 +01:00

602 lines
49 KiB
Plaintext

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "gru_attention.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "WtPfLDxTLoFn"
},
"source": [
"from __future__ import unicode_literals, print_function, division\r\n",
"from io import open\r\n",
"import unicodedata\r\n",
"import string\r\n",
"import re\r\n",
"import random\r\n",
"\r\n",
"import torch\r\n",
"import torch.nn as nn\r\n",
"from torch import optim\r\n",
"import torch.nn.functional as F\r\n",
"\r\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "X3xChOaALwwA"
},
"source": [
"SOS_token = 0\r\n",
"EOS_token = 1\r\n",
"\r\n",
"\r\n",
"class Lang:\r\n",
" def __init__(self, name):\r\n",
" self.name = name\r\n",
" self.word2index = {}\r\n",
" self.word2count = {}\r\n",
" self.index2word = {0: \"SOS\", 1: \"EOS\"}\r\n",
" self.n_words = 2 # Count SOS and EOS\r\n",
"\r\n",
" def addSentence(self, sentence):\r\n",
" for word in sentence.split(' '):\r\n",
" self.addWord(word)\r\n",
"\r\n",
" def addWord(self, word):\r\n",
" if word not in self.word2index:\r\n",
" self.word2index[word] = self.n_words\r\n",
" self.word2count[word] = 1\r\n",
" self.index2word[self.n_words] = word\r\n",
" self.n_words += 1\r\n",
" else:\r\n",
" self.word2count[word] += 1"
],
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "1rra860ILy48"
},
"source": [
"# Turn a Unicode string to plain ASCII, thanks to\r\n",
"# https://stackoverflow.com/a/518232/2809427\r\n",
"def unicodeToAscii(s):\r\n",
" return ''.join(\r\n",
" c for c in unicodedata.normalize('NFD', s)\r\n",
" if unicodedata.category(c) != 'Mn'\r\n",
" )\r\n",
"\r\n",
"# Lowercase, trim, and remove non-letter characters\r\n",
"\r\n",
"\r\n",
"def normalizeString(s):\r\n",
" s = unicodeToAscii(s.lower().strip())\r\n",
" s = re.sub(r\"([.!?])\", r\" \\1\", s)\r\n",
" s = re.sub(r\"[^a-zA-Z.!?]+\", r\" \", s)\r\n",
" return s"
],
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "sEcey4mxL3We"
},
"source": [
"def readLangs():\r\n",
" print(\"Reading lines...\")\r\n",
"\r\n",
" # Read the file and split into lines\r\n",
" linesIn = open('in_40k.tsv').read().strip().split('\\n')[:100]\r\n",
" linesOut = open('exp_40k.tsv').read().strip().split('\\n')[:100]\r\n",
" # Split every line into pairs and normalize\r\n",
" pairs = [[normalizeString(a),normalizeString(b)] for a,b in zip(linesIn,linesOut)]\r\n",
"\r\n",
" print(pairs)\r\n",
"\r\n",
" # Reverse pairs, make Lang instances\r\n",
" # pairs = [list(reversed(p)) for p in pairs]\r\n",
" input_lang = Lang('in')\r\n",
" output_lang = Lang('out')\r\n",
" return input_lang, output_lang, pairs"
],
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "59dvTVlsL9dK"
},
"source": [
"MAX_LENGTH = 80\r\n",
"\r\n",
"def filterPair(p):\r\n",
" #print(p)\r\n",
" return len(p[0].split(' ')) < MAX_LENGTH and \\\r\n",
" len(p[1].split(' ')) < MAX_LENGTH\r\n",
"\r\n",
"\r\n",
"def filterPairs(pairs):\r\n",
" return [pair for pair in pairs if filterPair(pair)]"
],
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "4pKs9H5_ST8P",
"outputId": "2d07edf9-1bbd-4676-9577-411dc09f28b7"
},
"source": [
"def prepareData(lang1, lang2, reverse=False):\r\n",
" input_lang, output_lang, pairs = readLangs()\r\n",
" print(\"Read %s sentence pairs\" % len(pairs))\r\n",
" pairs = filterPairs(pairs)\r\n",
" print(\"Trimmed to %s sentence pairs\" % len(pairs))\r\n",
" print(\"Counting words...\")\r\n",
" for pair in pairs:\r\n",
" input_lang.addSentence(pair[0])\r\n",
" output_lang.addSentence(pair[1])\r\n",
" print(\"Counted words:\")\r\n",
" print(input_lang.name, input_lang.n_words)\r\n",
" print(output_lang.name, output_lang.n_words)\r\n",
" return input_lang, output_lang, pairs\r\n",
"\r\n",
"\r\n",
"input_lang, output_lang, pairs = prepareData('pl', 'en', True)\r\n",
"#print(random.choice(pairs))"
],
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"text": [
"Reading lines...\n",
"[['naprawde wazne jest by wzrost gospodarczy nie powodowa automatycznie proporcjonalnego zwiekszonego zuzycia energii .', 'it is really important that growth should not automatically generate a proportionate rise in energy consumption .'], [' bg pani przewodniczaca panie premierze ! rok bedzie pierwszym w ktorym unii europejskiej beda przewodzic dwa kraje z europy srodkowej i wschodniej wegry oraz polska .', ' bg madam president prime minister will be the first year in which the european union will be headed by two countries from central and eastern europe hungary and poland .'], ['w dodatku odsetek ludzi w wieku ponad lat wzrosnie z w roku do w roku .', 'in addition the proportion of people aged over will rise from . in to . in .'], ['na pismie . sv w sprawozdaniu stwierdzono ze w wiekszosci panstw cz onkowskich spo eczenstwo starzeje sie co obciazy systemy zabezpieczenia spo ecznego i systemy emerytalne .', 'in writing . sv this report observes that in most member states the population is getting older and that the social security and pension systems will therefore be put under strain .'], ['oswiadczenia pisemne art . regulaminu ', 'written statements rule '], ['jestesmy na przyk ad za przeprowadzeniem wspolnych badan z zakresu bezpieczenstwa jadrowego ale obawiamy sie ze wiele punktow sprawozdania wyraza zbyt mocne poparcie dla kwestgii energii jadrowej .', 'we are in favour of common research into nuclear safety for example but we feel that in several cases the report is far too pro nuclear energy .'], ['kolejna kwestia wspomniana w trakcie debaty by a kwestia sprzeciwu wobec protekcjonizmu .', 'another point that was mentioned during the debate was the issue of resistance to protectionism .'], ['s uzba zewnetrzna musi w pe ni dotrzymywac kroku komisji .', 'the external service must be wholly in step with the commission .'], ['z zadowoleniem przyjmujemy propozycje wysuniete celem zagwarantowania usprawnienia krajowych ram budzetowych i zachecenia panstw cz onkowskich do podejmowania bardziej wywazonych decyzji budzetowych w przysz osci .', 'we welcome the proposal put forward to ensure improvements in national fiscal frameworks and to encourage member states to make better fiscal decisions in the future .'], ['po pierwsze nie sadze aby umniejszanie grekow w sposob w jaki uczyni to pan pose soini by o wartosciowe czy nawet w asciwe .', 'first of all i do not believe that belittling the greeks in the manner that mr soini did is very useful or even professionally appropriate .'], ['dopuscilismy na przyk ad do spadku naszych wolnych mocy produkcyjnych o ok . w skali roku a to powoduje niepewnosc .', 'we have for example allowed our spare capacity to fall by around every year and that is creating insecurity .'], ['na pismie . chce podziekowac sprawozdawcy za wspania a prace .', 'in writing . i would like to thank the rapporteur for his excellent work .'], ['chcia abym sie odniesc przede wszystkim do misji obserwacyjnych w afryce ze wzgledu na szczegolna wspo prace ue z panstwami afryki karaibow i pacyfiku .', 'i would like to speak with particular reference to observation missions in africa because there is a special partnership between the eu and african caribbean and pacific countries .'], ['g osowa am za przyjeciem przedmiotowej rezolucji .', 'i voted in favour of this resolution .'], ['wniosek komisji ktory w stylu wielkiego brata stwierdza ze nalezy dostarczac owoce sezonowe podkreslajac roznorodnosc owocow tak aby dzieci mog y odkrywac ich smaki jest ca kowicie absurdalny .', 'the committee s proposal which in a big brother like manner states that seasonal fruit should be distributed giving preference to a varied range of fruits so as to enable children to discover different tastes is completely ridiculous .'], ['w kazdym razie przyjelismy to do wiadomosci i wezmiemy je pod uwage .', 'we have in any case taken note of them and shall take them into consideration .'], ['chcia abym skomentowac tresc w zakresie czterech czy pieciu konkretnych spraw .', 'i would like to comment on the content in relation to four or five particular issues .'], ['chce rowniez podziekowac pani komisarz za jej zaangazowanie i wsparcie w tym okresie . dziekuje takze jej s uzbom .', 'i also wish to thank the commissioner for her involvement and support throughout this period and i thank her services as well .'], ['dauha napedzi swiatowa gospodarke i zapobiegnie narastaniu protekcjonizmu .', 'doha would boost the world economy and prevent protectionism from picking up .'], ['w programie sztokholmskim przyjetym przez rade europejska w dniach grudnia r . wzywa sie panstwa cz onkowskie do prowadzenia polityki imigracyjnej charakteryzujacej sie elastycznoscia w celu wspierania rozwoju i wynikow gospodarczych unii .', 'the stockholm programme adopted by the european council of and december calls on member states to adopt immigration policies marked by flexible arrangements to support the development and economic performance of the union .'], ['nalezy wyraznie powiedziec nie czynimy wyjatku dla zadnego z sektorow naszego przemys u .', 'let us make it clear we are not making an exception for any sector of our industry .'], ['komisja europejska podobnie jak dwa komitety konsultacyjne jest przygotowana do prowadzenia dyskusji z parlamentem i rada ministrow w trakcie prac nad stworzeniem wspolnego systemu .', 'the european commission is ready for discussions with parliament and the council of ministers as well as the two consultative committees in working towards the development of a shared system .'], ['moja komisja nie mia a nic wspolnego z ta rezolucja .', 'my committee was not involved in the resolution at all .'], ['w scisle okreslonych okolicznosciach niektore produkty przeznaczone dla zwierzat z rodziny koniowatych nie beda musia y miec ustanowionych wartosci ndp lecz w ich przypadku wymagane bedzie przestrzeganie szesciomiesiecznego okresu wycofania .', 'in clearly defined circumstances certain products used for equidae will not have to have mrls but will have to respect a six month withdrawal period .'], ['klauzule o zachowaniu tajemnicy zawodowej zosta y rozszerzone i oprocz s uzb komisji objeto nimi takze cz onkow grup ekspertow a zatem zamiast w kierunku wiekszej przejrzystosci dazymy do wiekszej tajemniczosci .', 'professional secrecy clauses have been extended beyond commission staff to include members of expert groups so that instead of moving towards more transparency we are moving towards more secrecy .'], ['komunizm to zbrodnia przeciwko ludzkosci .', 'communism is a crime against humanity .'], ['pracownicy nie beda musieli pracowac d uzej niz godzin w tygodniu obliczane jako srednia dla trzech miesiecy lub godzin tygodniowo usrednione za trzy miesiace kiedy nieaktywna czesc dyzuru uzna sie za czas pracy .', 'employees will not be required to work in excess of hours a week averaged over three months or hours a week averaged over three months when the inactive part of on call time is regarded as working time .'], ['teraz jest juz pewne to uznany fakt ze kryzys juz teraz pociaga za soba konsekwencje dla realnej gospodarki oraz ze przed nami jeszcze trudniejsze czasy .', 'it is now certain it is a recognised fact that the crisis is already having consequences for the real economy and that we have more difficult times ahead .'], ['uznaje to za owoc wysoce konstruktywnej wspo pracy zapoczatkowanej miedzy obydwoma instytucjami z chwila rozpoczecia analizy przedmiotowego dokumentu .', 'it sees in this the fruits of some highly constructive cooperation initiated between the two institutions from the very beginning of the examination of this text .'], ['w jaki sposob znajduje to odzwierciedlenie w reformie budzetu ?', 'how is this reflected in the budget reform ?'], ['to by ogolnie dobry test ktory przynios parlamentowi doskona e rezultaty .', 'overall it has been a good test which has brought excellent results for parliament .'], ['nadszed czas do zawarcia umowy .', 'it is time to do a deal .'], ['w bilansie na r . znaleziono z oty srodek pomiedzy finansowaniem z jednej strony polityk zewnetrznych takich jak pomoc dla kosowa i palestyny a z drugiej strony projektu galileo ktory scislej wiaze ze soba panstwa cz onkowskie .', 'a balance has been struck for between financing on the one hand for external policies such as support for kosovo and palestine and on the other for galileo a project that binds the member states closer together .'], ['chcia bym rowniez odpowiedziec na komentarze pos a matsakisa . podzielam jego zdanie na temat alkoholu .', 'i should also like to respond to what mr matsakis said i share his view about alcohol .'], ['sadze ze juz panstwo zauwazyli ze komisja zawsze zdecydowanie podkresla potrzebe utrzymania nalezytego poziomu finansowania badan i innowacji w sektorze edukacji poniewaz uwazamy ze w ten sposob mozemy ochronic i poprawic konkurencyjnosc europy oraz przygotowac naszych przysz ych badaczy przysz ych pracownikow wysoce konkurencyjnych sektorow do osiagania lepszych wynikow w przysz osci .', 'i think you have already noticed that there is always a strong voice from the commission side highlighting the need to keep an adequate level of financing for research and innovation in the educational sectors because we believe this is how we will preserve and improve our competitive edge and prepare our future researchers our future workers in highly competitive areas for better performance in the future .'], ['w zwiazku z pilnymi kwestiami mamy ca e mnostwo spraw do rozpatrzenia miedzy innymi solidarnosc z ofiarami burz w brazylii gdzie zgine o ponad osob .', 'with regard to urgent matters we have a whole host of issues here including in brazil solidarity with the victims of the storms which have left more than people dead .'], ['jutro w komisji opublikujemy sprawozdanie w sprawie realizacji strategii tematycznej w zakresie przeciwdzia ania produkcji odpadow i ich recyklingowi .', 'tomorrow we will publish in the commission the report on the implementation of a thematic strategy on prevention and recycling of waste .'], ['szacuje sie ze przez lat dyktatury w wiezieniach znalaz o sie ponad piec milionow osob ponad tysiecy zmar o na skutek tortur natomiast ostatnio zamordowano zosta o ponad osob .', 'it is estimated that during years of dictatorship more than five million people have been imprisoned more than tortured to death and recently more than killed .'], ['musimy nadal wspierac jak to czynilismy przez wiele lat mobilnosc w unii europejskiej i musimy zapewnic istnienie uczciwej konkurencji miedzy roznymi rodzajami transportu .', 'we need to carry on as we have been for a number of years in encouraging this mobility in the european union and we need to ensure that there is fair competition between the various modes of transport .'], ['po pierwsze sprzeciwiamy sie prawodawstwu stanowiacemu ze zdrowie kobiet kszta cenie i prawa rozrodcze nie podlegaja panstwom cz onkowskim a leza w gestii ue .', 'firstly we oppose any legislation that assumes that women s health education and reproductive rights are the responsibility not of member states but of the eu .'], ['panie przewodniczacy ! ca a ta sprawa wywo a a mnostwo problemow w szczegolnosci w sektorze rolniczym w irlandii .', 'mr president this whole matter has caused a lot of problems particularly in the agricultural sector in ireland .'], ['musimy szerzej odpowiedziec na pytanie jak ? .', 'we need to have a wider answer to the question of how ? .'], ['chcia bym pogratulowac prezydencji szwedzkiej i przede wszystkim komisarzowi rehnowi jego pracy .', 'i would like to compliment the swedish presidency and above all commissioner rehn on their work .'], ['jesli przeniesiemy prawo do decydowania w tych kwestiach na ue to jaki bedzie kolejny ruch ?', 'if we hand over the right to decide on these matters to the eu what is the next step ?'], ['potrzeba nam nowej konstytucji a nie tylko nowej nazwy .', 'we need a new constitution not just a new name .'], ['w imieniu grupy verts ale . pani przewodniczaca w imieniu mojej grupy z zadowoleniem przyjmuje to porozumienie .', 'on behalf of the verts ale group . madam president on behalf of my group i would also like to welcome this agreement .'], ['maroni to zdemaskowa .', 'maroni exposed this .'], ['kwestia ta zyska a juz uwage we flandrii i niebawem ma zyskac uwage wszystkich rzadow europy .', 'this area had already received attention in flanders and is now set to receive attention from all the governments of europe .'], ['nalezy do ozyc wszelkich staran aby zatrzymac i ukarac zabojcow .', 'every effort must be made to detain and punish the murderers .'], ['postanowienie to odpowiada na w oski postulat wprowadzenia prawdziwej regionalnej klauzuli ochronnej majacej zastosowanie tylko do pewnych regionow unii europejskiej .', 'this provision takes up the italian request to introduce a genuine regional safeguard clause applicable only in certain regions of the european union .'], ['w zwiazku z tym ue musi umiescic turystyke w strefie przybrzeznej na liscie swoich politycznych priorytetow .', 'in this respect the eu must include coastal tourism in the list of its political priorities .'], ['bedziemy wspierac oszczednosc energii wydajnosc energetyczna i energie odnawialna . ebi jest takze liderem rozwoju metodologii bardzo z ozonej z powodu trudnosci technicznych pozwalajacej na bardziej precyzyjny pomiar emisji dwutlenku wegla we wszystkich projektach ktore finansuje .', 'we are going to continue to support energy saving energy efficiency and renewable energies and we are also spearheading the development of a methodology an inevitably complex one given the technical difficulties to evaluate more precisely the carbon footprint of all the projects that we finance .'], ['na samym poczatku chcia abym wyraznie powiedziec ze ue w pe ni popiera stabilizacje i normalizacje sytuacji w gruzji oraz reformy demokratyczne w tym kraju .', 'right at the start i would like to make it clear that the eu is giving its full support to the stabilisation and normalisation of georgia and to democratic reforms in the country .'], ['komisja zdecydowanie zobowiaza a sie wspierac w asciwe ramy dwustronnej wspo pracy miedzy regionami .', 'the commission has made a firm commitment to promote the appropriate bilateral regional cooperation frameworks .'], ['faktem jest ze poziom kontroli tych instalacji stanowi bardzo wazny czynnik i powinien byc podwyzszany a kontrole powinny byc przeprowadzane z wieksza czestotliwoscia .', 'it is a fact that the level of control of these installations is a very important factor and as such it should be increased and carried out at shorter intervals .'], ['jednym z istotnych celow polityki powinno byc stworzenie warunkow ramowych umozliwiajacych ochrone miejsc pracy w niemczech nawet w czasie kryzysu .', 'one important policy objective must be to create framework conditions to protect jobs in germany even in times of crisis .'], ['niektorym moze sie to wydawac kosztowne ale rowne traktowanie jest koniecznoscia podobnie jak w przypadku osob dotknietych inna forma niepe nosprawnosci tak abysmy mogli szanowac samych siebie oraz wartosci europejskiego spo eczenstwa .', 'this may seem costly to some but receiving equal treatment is an absolute must as it is for those suffering from other disabilities so that we can respect ourselves and the values of european society .'], ['pozwola panstwo ze bardziej ogolnie odniose sie do pakietow pobudzajacych aktywnosc gospodarcza przyjetych przez unie i panstwa cz onkowskie od konca ubieg ego roku stanowiacych g owny wk ad we wsparcie polityki zatrudnienia .', 'more generally let me recall that the stimulus packages that the union and the member states have adopted since the end of last year constitute major contributions to supporting employment .'], ['prawie dzieci w europie cierpi na zaburzenia dys dzieci takie sa zazwyczaj niezauwazane przez systemy edukacyjne ktore to systemy nazbyt czesto przypisuja ich porazke w nauce niepowiazanym przyczynom .', 'almost of europe s children suffer from dys problems children who are usually invisible to our education systems which all too frequently blame their academic failure on unrelated causes .'], ['jednak faktycznie wykorzystane srodki budzetowe sa mniejsze od tej kwoty i ich wysokosc stale maleje od roku .', 'however the actual budget used has been smaller than that amount and has been steadily decreasing since .'], ['w artykule traktatu o unii europejskiej uznano spojnosc terytorialna jako jeden z celow ue .', 'article of the treaty on european union acknowledges territorial cohesion as an objective of the eu .'], ['msp stanowia podmiotow w gospodarce ue i zapewniaja dwie trzecie miejsc pracy .', 'smes account for more than of the eu economy and two thirds of its jobs .'], ['za uje jednak ze sprawozdanie to nie zosta o sporzadzone wspolnie przez komisje spraw zagranicznych i komisje ds . rozwoju biorac pod uwage fakt ze przewodniczacy tych dwoch komisji razem przewodnicza grupie parlamentu europejskiego odpowiedzialnej za obserwacje wyborow .', 'however i regret that the report was not drafted jointly by the committee on foreign affairs and the committee on development in view of the fact that the chairmen of these two committees jointly chair the european parliament s election observation group .'], ['z tego powodu uwazam ze w zasadzie powinnismy posuwac do przodu prace nad umowa o wolnym handlu pomiedzy ue a japonia .', 'for this reason i think we should actually be pressing ahead with an eu japan free trade agreement .'], ['ze wszystkich tych wzgledow musimy usprawnic mechanizmy wspierania adopcji miedzynarodowej umotywowanej ochrona dzieci ujednolicajac poszczegolne wymogi i procedury okreslone w ustawodawstwie panstw cz onkowskich w szczegolnosci stosujac art . europejskiej karty praw dziecka i uwzgledniajac art . karty praw podstawowych .', 'for all these reasons we must foster mechanisms to promote an international adoption instrument inspired by protecting children harmonising the different requirements and procedures laid down in the legislation of the member states in particular applying article of the european charter of children s rights and taking into consideration article of our charter of fundamental rights .'], ['pragne zauwazyc ze dopoki nie zapewnimy m odym ludziom mozliwosci zdobycia upragnionych kwalifikacji podejmowanie dzia an majacych sk onic ich do wyboru takiego zawodu bedzie jedynie rodzic w nich frustracje .', 'i would suggest that in working to attract young people to this profession you will only frustrate them until we provide them with the opportunity to gain these skills .'], ['ponadto zwracam sie do rady z prosba o rozwazenie na ozenia celowych sankcji co chcia aby uczynic grupa socjalistyczna ale nie mamy zadnego poparcia w tej kwestii na przyk ad zakaz podrozy zamrozenie aktywow .', 'i am also inviting the council to consider targeted sanctions which is what the socialist group would have liked to have done but we did not get any support for it travel bans perhaps the freezing of assets .'], ['moje wystapienie ma na celu zapewnic by nie zapomniano takze o tym wymiarze .', 'i am interceding in order to ensure that it is not forgotten .'], ['rada europejska wspiera ponadto pe ne wdrozenie planu dzia an komisji dotyczacego inicjatywy small business act ktora zosta a przyjeta przez rade w dniu grudnia r .', 'the european council also supports full implementation of the commission action plan on the small business act initiative that was adopted by the council on december .'], ['jednak chce zobaczyc ten kompromis jako podstawe aby w przysz osci unia europejska mog a dzia ac lepiej .', 'however i want to see this compromise used as the basis so that in future the european union can do it better .'], ['sa wyrazem obaw rzadu rumunskiego dotyczacych finansowej stabilnosci transgranicznej opieki zdrowotnej oraz skali jaka moze ona przybrac poniewaz jak panstwo doskonale wiedza niektore panstwa cz onkowskie maja bardzo ma y dochod narodowy .', 'they conveyed the romanian government s concern about the financial stability of cross border healthcare and the proportions it may assume because as you are very well aware some member states have a very small national income .'], ['i jak pan powiedzia panie ministrze rok mia byc rokiem powrotu rokiem ponownego pobudzenia europy spo ecznej .', 'and as you said minister was supposed to be a come back year the year in which social europe was re energised .'], [' el pani przewodniczaca ! traktuje problem walki z chorobami neurodegeneracyjnych a w szczegolnosci choroba alzheimera niezwykle powaznie .', ' el madam president i take the problem of combating neurodegenerative diseases and especially alzheimer s extremely seriously .'], ['nie powinnismy sie bac debat na zasadnicze tematy i pragne panu podziekowac panie urzedujacy przewodniczacy rady za panskie wystapienie poniewaz polityka gospodarcza i spo eczna stanowia powod do takiej zasadniczej debaty debaty istotnej dla naszych obywateli debaty ktora wymaga naszej odpowiedzi w krotkiej sredniej i d ugiej perspektywie czasowej .', 'we should not fear major debates and i wish to thank you mr president in office of the council for having made your contribution because economic and social policy is the major debate the one that is of most concern to our citizens the one that demands our response in the short medium and long term .'], ['z tego wzgledu powinnismy wykorzystac wieksza czesc z tych miliardow jako fundusz gwarancyjny aby zdobyc czy miliardow inwestycji publicznych i prywatnych .', 'therefore we should use most of the eur billion as guarantee funds to leverage eur or billion of public and private investment .'], ['sprawozdanie s usznie zajmuje sie obawami obywateli unii a zw aszcza obywateli panstw nadba tyckich dotyczacymi oddzia ywania na srodowisko planowanego gazociagu .', 'the report addresses creditably the concerns of union s citizens especially citizens of the states surrounding the baltic about the environmental impact of the planned gas pipeline .'], ['obecnie ue wspo pracuje z unia afrykanska i innymi organizacjami regionalnymi aby wzmocnic ich potencja rozwiazywania problemow w dziedzinie ochrony srodowiska i zmian klimatu .', 'at present the eu is working with the african union and other regional organisations to strengthen their capacity to address environment and climate change issues .'], ['panie przewodniczacy ! wznowienie walk we wschodniej czesci konga osmiesza prawa cz owieka i ucisza demokracje .', 'mr president renewed combat in the eastern part of the congo is making a mockery of human rights and is silencing democracy .'], ['nie by am za rozszerzeniem zakresu lecz zorientowa am sie ze wiekszosc zmierza w asnie w tym kierunku .', 'i was not in favour of expanding the scope but then i saw that a majority was moving in that direction .'], ['powierzchnia i produkcja waza po w przypadku tej czesci koperty ktora konkretnie dotyczy promocji w krajach trzecich co uwazam za nowa i bardzo istotna polityke perspektywiczna dla sektora wina .', ' for area and for production for that part of the envelope which is specifically related to promotion in third countries which i consider to be a new and very important forward looking policy for the wine sector .'], [' przed g osowaniem nad poprawka ', ' before the vote on amendment '], ['zaproponowa a ona parlamentowi rozsadne rozwiazania problemu przenoszenia wysokoemisyjnej produkcji przemys owej do krajow trzecich wprowadzi a kryteria jakosciowe oraz limit korzystania z kompensacji emisji oraz mechanizmow czystego rozwoju oraz stara a sie utrzymac na poziomie mniej wiecej swobode panstw cz onkowskich w korzystaniu z przychodow z aukcji przydzia ow .', 'she has proposed to this parliament sensible solutions to the problems of carbon leakage in industry she has introduced quality criteria and a limit on the use of off setting and clean development mechanisms and she has tried to keep to around the freedom of member states to use the revenues generated from auctioning allowances .'], ['wszystkie te zezwolenia sa stosowane w ue .', 'each of these applies within the eu .'], ['dobrze ze sprawami konsumenckimi zajmuje sie teraz ta sama komisja ale byc moze szkoda ze nie skupiamy sie juz na rynku wewnetrznym oraz prawie cywilnym i handlowym .', 'it is good that consumer affairs are now dealt with by the same committee but it is perhaps a shame that we have lost that focus on the internal market and civil and commercial law .'], ['zrobmy wszystko by go dotrzymac poniewaz sa to rzeczywiste problemy i prawdziwi ludzie a my musimy sobie z tym poradzic juz teraz .', 'let us be able to live up to it because these are real problems and real people and we have to deal with them now .'], ['na pismie el aby ugruntowac dominujaca pozycje ue na rynku energii elektrycznej i gazu ziemnego poprzez inwestycje kapita owe na wielka skale komisja przygotowuje obecnie pakiet pieciu wnioskow dla trzeciego pakietu legislacyjnego .', 'to complete the dominance of the eu electricity and natural gas markets by large scale capital interests the package of five proposals for the third legislative bundle is now being prepared by the commission .'], [' de panie przewodniczacy ! zrobimy wyjatek i poprzemy te propozycje .', ' de mr president we shall make an exception and support that proposal .'], ['moim zdaniem to wazne ze rezolucja ta jest wynikiem prac wielu grup parlamentu .', 'i think it was very important for us to make this a cross group resolution .'], ['ochrona praw cz owieka i ludzkiej godnosci nie ma racji bytu gdy s aba polityka zwieksza ryzyko terroryzmu i marazmu spo ecznego .', 'safeguarding human rights and individual dignity is completely at odds with weak policies that encourage the threat of terrorism and social malaise .'], ['na zakonczenie pragne podziekowac naszemu pos owi sprawozdawcy za znakomita prace .', 'finally i should like to thank our rapporteur for his excellent work .'], ['nalezy podkreslic iz plan prac opracowano w wyniku szczego owych dyskusji z innymi instytucjami zawierajac w nim rowniez zagadnienia omawiane ostatnio podczas debaty o globalizacji na nieformalnym spotkaniu rady europejskiej w lizbonie .', 'it should be stressed that the work plan was elaborated in the light of detailed discussions with other institutions including the issues discussed recently in the debate on globalisation at the council s informal meeting in lisbon .'], ['nie akceptuje tego .', 'i do not accept that .'], [' oklaski ', ' applause '], ['przystapienie ue do europejskiej konwencji praw cz owieka bedzie oznaczac ze ochrona praw podstawowych zostanie uzupe niona i wzmocniona ze obywatele w naszych panstwach cz onkowskich beda mieli lepsza ochrone w odniesieniu do dzia an ue oraz ze praktyka prawna w dziedzinie praw cz owieka zostanie lepiej sharmonizowana w dwoch europejskich trybuna ach w hadze i strasburgu .', 'the eu s accession to the european convention will mean that the protection of fundamental rights will be supplemented and strengthened that citizens in our member states will have better protection in relation to the eu s activities and that legal practice in the area of human rights will be harmonised better in the two european courts in the hague and strasbourg .'], ['taka zasade przyjeto w dniu marca r .', 'this is the principle of march .'], ['uwazam ze zawsze gdy zostaje zawarta umowa nalezy nalegac na zachowanie zasady wzajemnosci i wierze ze tego rodzaju umowy moga s uzyc jako podstawa na ktorej inni moga sie wzorowac .', 'i augur that whenever an agreement is concluded insistence be made upon the principle of reciprocity and i believe that it is these type of agreements that can serve as the foundation for this to happen elsewhere .'], ['komisja twierdzi ze idealna sytuacja by oby gdyby konsumenci ue mogli miec te same podstawowe prawa niezaleznie od tego gdzie sie znajduja w unii i gdzie dokonali zakupow .', 'the commission says that the ideal situation would be if eu consumers could have the same basic rights wherever they were in the union and wherever they did their shopping .'], ['do solidarnosci zobowiaza o sie wiele panstw cz onkowskich komisja i inne panstwa i grecja wyraza wdziecznosc tym ktorzy podjeli bezzw oczne dzia ania na rzecz walki z kleska zywio owa spowodowana czynnikami ktorych nie sposob by o przewidziec .', 'solidarity has been pledged by many member states the commission and other countries and greece is grateful to those who have taken swift action against a natural disaster arising from conditions beyond anything we could have imagined .'], ['niestety na tym etapie podczas ostatniego g osowania niewiele mozemy juz zrobic w tej sprawie .', 'unfortunately there is not much we can do at this stage of the issue in the final vote .'], ['rezolucja by aby w asciwa dopiero po nadchodzacych wyborach na ukrainie .', 'a resolution would only make sense after the forthcoming elections in ukraine .']]\n",
"Read 100 sentence pairs\n",
"Trimmed to 100 sentence pairs\n",
"Counting words...\n",
"Counted words:\n",
"in 1155\n",
"out 877\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1lEImRDtSYK1",
"outputId": "0d345a19-5f00-40cf-98fe-ebb846ef9a74"
},
"source": [
"input_lang.n_words"
],
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"1155"
]
},
"metadata": {
"tags": []
},
"execution_count": 9
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "jbMReYBuMBUy"
},
"source": [
"class EncoderRNN(nn.Module):\r\n",
" def __init__(self, input_size, hidden_size):\r\n",
" super(EncoderRNN, self).__init__()\r\n",
" self.hidden_size = hidden_size\r\n",
"\r\n",
" self.embedding = nn.Embedding(input_size, hidden_size)\r\n",
" self.gru = nn.GRU(hidden_size, hidden_size)\r\n",
"\r\n",
" def forward(self, input, hidden):\r\n",
" embedded = self.embedding(input).view(1, 1, -1)\r\n",
" output = embedded\r\n",
" output, hidden = self.gru(output, hidden)\r\n",
" return output, hidden\r\n",
"\r\n",
" def initHidden(self):\r\n",
" return torch.zeros(1, 1, self.hidden_size, device=device)"
],
"execution_count": 10,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "FTGPW7-AMC5R"
},
"source": [
"class AttnDecoderRNN(nn.Module):\r\n",
" def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):\r\n",
" super(AttnDecoderRNN, self).__init__()\r\n",
" self.hidden_size = hidden_size\r\n",
" self.output_size = output_size\r\n",
" self.dropout_p = dropout_p\r\n",
" self.max_length = max_length\r\n",
"\r\n",
" self.embedding = nn.Embedding(self.output_size, self.hidden_size)\r\n",
" self.attn = nn.Linear(self.hidden_size * 2, self.max_length)\r\n",
" self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)\r\n",
" self.dropout = nn.Dropout(self.dropout_p)\r\n",
" self.gru = nn.GRU(self.hidden_size, self.hidden_size)\r\n",
" self.out = nn.Linear(self.hidden_size, self.output_size)\r\n",
"\r\n",
" def forward(self, input, hidden, encoder_outputs):\r\n",
" embedded = self.embedding(input).view(1, 1, -1)\r\n",
" embedded = self.dropout(embedded)\r\n",
"\r\n",
" attn_weights = F.softmax(\r\n",
" self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)\r\n",
" attn_applied = torch.bmm(attn_weights.unsqueeze(0),\r\n",
" encoder_outputs.unsqueeze(0))\r\n",
"\r\n",
" output = torch.cat((embedded[0], attn_applied[0]), 1)\r\n",
" output = self.attn_combine(output).unsqueeze(0)\r\n",
"\r\n",
" output = F.relu(output)\r\n",
" output, hidden = self.gru(output, hidden)\r\n",
"\r\n",
" output = F.log_softmax(self.out(output[0]), dim=1)\r\n",
" return output, hidden, attn_weights\r\n",
"\r\n",
" def initHidden(self):\r\n",
" return torch.zeros(1, 1, self.hidden_size, device=device)"
],
"execution_count": 11,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "LuTQI2G3MEpk"
},
"source": [
"def indexesFromSentence(lang, sentence):\r\n",
" res = []\r\n",
" for word in sentence.split(' '):\r\n",
" if word not in lang.word2index:\r\n",
" res.append(random.randrange(len(lang.word2index)))\r\n",
" else:\r\n",
" res.append(lang.word2index[word])\r\n",
" return res\r\n",
"\r\n",
"\r\n",
"def tensorFromSentence(lang, sentence):\r\n",
" indexes = indexesFromSentence(lang, sentence)\r\n",
" indexes.append(EOS_token)\r\n",
" return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)\r\n",
"\r\n",
"\r\n",
"def tensorsFromPair(pair):\r\n",
" input_tensor = tensorFromSentence(input_lang, pair[0])\r\n",
" target_tensor = tensorFromSentence(output_lang, pair[1])\r\n",
" return (input_tensor, target_tensor)"
],
"execution_count": 22,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "XRaHJN_5MGzk"
},
"source": [
"teacher_forcing_ratio = 0.5\r\n",
"\r\n",
"\r\n",
"def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):\r\n",
" encoder_hidden = encoder.initHidden()\r\n",
"\r\n",
" encoder_optimizer.zero_grad()\r\n",
" decoder_optimizer.zero_grad()\r\n",
"\r\n",
" input_length = input_tensor.size(0)\r\n",
" target_length = target_tensor.size(0)\r\n",
"\r\n",
" encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)\r\n",
"\r\n",
" loss = 0\r\n",
"\r\n",
" for ei in range(input_length):\r\n",
" encoder_output, encoder_hidden = encoder(\r\n",
" input_tensor[ei], encoder_hidden)\r\n",
" encoder_outputs[ei] = encoder_output[0, 0]\r\n",
"\r\n",
" decoder_input = torch.tensor([[SOS_token]], device=device)\r\n",
"\r\n",
" decoder_hidden = encoder_hidden\r\n",
"\r\n",
" use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False\r\n",
"\r\n",
" if use_teacher_forcing:\r\n",
" # Teacher forcing: Feed the target as the next input\r\n",
" for di in range(target_length):\r\n",
" decoder_output, decoder_hidden, decoder_attention = decoder(\r\n",
" decoder_input, decoder_hidden, encoder_outputs)\r\n",
" loss += criterion(decoder_output, target_tensor[di])\r\n",
" decoder_input = target_tensor[di] # Teacher forcing\r\n",
"\r\n",
" else:\r\n",
" # Without teacher forcing: use its own predictions as the next input\r\n",
" for di in range(target_length):\r\n",
" decoder_output, decoder_hidden, decoder_attention = decoder(\r\n",
" decoder_input, decoder_hidden, encoder_outputs)\r\n",
" topv, topi = decoder_output.topk(1)\r\n",
" decoder_input = topi.squeeze().detach() # detach from history as input\r\n",
"\r\n",
" loss += criterion(decoder_output, target_tensor[di])\r\n",
" if decoder_input.item() == EOS_token:\r\n",
" break\r\n",
"\r\n",
" loss.backward()\r\n",
"\r\n",
" encoder_optimizer.step()\r\n",
" decoder_optimizer.step()\r\n",
"\r\n",
" return loss.item() / target_length"
],
"execution_count": 13,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "O67rLt62MJST"
},
"source": [
"import time\r\n",
"import math\r\n",
"\r\n",
"\r\n",
"def asMinutes(s):\r\n",
" m = math.floor(s / 60)\r\n",
" s -= m * 60\r\n",
" return '%dm %ds' % (m, s)\r\n",
"\r\n",
"\r\n",
"def timeSince(since, percent):\r\n",
" now = time.time()\r\n",
" s = now - since\r\n",
" es = s / (percent)\r\n",
" rs = es - s\r\n",
" return '%s (- %s)' % (asMinutes(s), asMinutes(rs))"
],
"execution_count": 14,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "gRBh9zz-MLjh"
},
"source": [
"def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):\r\n",
" start = time.time()\r\n",
" plot_losses = []\r\n",
" print_loss_total = 0 # Reset every print_every\r\n",
" plot_loss_total = 0 # Reset every plot_every\r\n",
"\r\n",
" encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)\r\n",
" decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)\r\n",
" training_pairs = [tensorsFromPair(random.choice(pairs))\r\n",
" for i in range(n_iters)]\r\n",
" criterion = nn.NLLLoss()\r\n",
"\r\n",
" for iter in range(1, n_iters + 1):\r\n",
" training_pair = training_pairs[iter - 1]\r\n",
" input_tensor = training_pair[0]\r\n",
" target_tensor = training_pair[1]\r\n",
"\r\n",
" loss = train(input_tensor, target_tensor, encoder,\r\n",
" decoder, encoder_optimizer, decoder_optimizer, criterion)\r\n",
" print_loss_total += loss\r\n",
" plot_loss_total += loss\r\n",
"\r\n",
" if iter % print_every == 0:\r\n",
" print_loss_avg = print_loss_total / print_every\r\n",
" print_loss_total = 0\r\n",
" print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),\r\n",
" iter, iter / n_iters * 100, print_loss_avg))\r\n",
"\r\n",
" if iter % plot_every == 0:\r\n",
" plot_loss_avg = plot_loss_total / plot_every\r\n",
" plot_losses.append(plot_loss_avg)\r\n",
" plot_loss_total = 0\r\n",
"\r\n",
" showPlot(plot_losses)"
],
"execution_count": 15,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Qqkc5IsEMOfW"
},
"source": [
"def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):\r\n",
" with torch.no_grad():\r\n",
" input_tensor = tensorFromSentence(input_lang, sentence)\r\n",
" input_length = input_tensor.size()[0]\r\n",
" encoder_hidden = encoder.initHidden()\r\n",
"\r\n",
" encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)\r\n",
"\r\n",
" for ei in range(input_length):\r\n",
" encoder_output, encoder_hidden = encoder(input_tensor[ei],\r\n",
" encoder_hidden)\r\n",
" encoder_outputs[ei] += encoder_output[0, 0]\r\n",
"\r\n",
" decoder_input = torch.tensor([[SOS_token]], device=device) # SOS\r\n",
"\r\n",
" decoder_hidden = encoder_hidden\r\n",
"\r\n",
" decoded_words = []\r\n",
" decoder_attentions = torch.zeros(max_length, max_length)\r\n",
"\r\n",
" for di in range(max_length):\r\n",
" decoder_output, decoder_hidden, decoder_attention = decoder(\r\n",
" decoder_input, decoder_hidden, encoder_outputs)\r\n",
" decoder_attentions[di] = decoder_attention.data\r\n",
" topv, topi = decoder_output.data.topk(1)\r\n",
" if topi.item() == EOS_token:\r\n",
" decoded_words.append('<EOS>')\r\n",
" break\r\n",
" else:\r\n",
" decoded_words.append(output_lang.index2word[topi.item()])\r\n",
"\r\n",
" decoder_input = topi.squeeze().detach()\r\n",
"\r\n",
" return decoded_words, decoder_attentions[:di + 1]"
],
"execution_count": 16,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "u_aEPNTQMRQc"
},
"source": [
"hidden_size = 256\r\n",
"encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)\r\n",
"attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)\r\n",
"\r\n",
"trainIters(encoder1, attn_decoder1, 25000, print_every=20)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "sZH-wZjyRd9V"
},
"source": [
"evaluate(encoder1, attn_decoder1, \"Co tam u ciebie\")"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "GgeEWwJAZAfE"
},
"source": [
"def evaluateAndShow(input_sentence):\r\n",
" output_words = evaluate(\r\n",
" encoder1, attn_decoder1, input_sentence)\r\n",
" return \" \".join(output_words[0])"
],
"execution_count": 36,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "OvpxV5Sz19Wg",
"outputId": "758386a3-7365-4297-bd2c-809b5732ef6b"
},
"source": [
"evaluateAndShow(\"Co tam u cbie\")"
],
"execution_count": 37,
"outputs": [
{
"output_type": "execute_result",
"data": {
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
},
"text/plain": [
"'let us be able to live up to it because these are real problems and real people and we have to deal with them now . <EOS>'"
]
},
"metadata": {
"tags": []
},
"execution_count": 37
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "tak-qrYjyjws"
},
"source": [
"temp = open('in.tsv', 'r').readlines()\r\n",
"data = []\r\n",
"for sent in temp:\r\n",
" data.append(sent.replace('\\n',''))\r\n",
"\r\n",
"f=open('out.tsv','w+')\r\n",
"for sent in data:\r\n",
" f.write(evaluateAndShow(sent).replace('<EOS>', '') + '\\n')\r\n",
"\r\n",
"f.close()"
],
"execution_count": 38,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "qpuTVdo12O5y"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}