From a3b403ac35ec8d15391de55b8947bcc4f589be87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Jaworski?= Date: Wed, 24 Oct 2018 14:45:36 +0200 Subject: [PATCH 1/6] pytania odpowiedzi --- .../pytania_odpowiedzi_logistyka.cfg | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 cat/versions_available/pytania_odpowiedzi_logistyka.cfg diff --git a/cat/versions_available/pytania_odpowiedzi_logistyka.cfg b/cat/versions_available/pytania_odpowiedzi_logistyka.cfg new file mode 100644 index 0000000..6ca35e0 --- /dev/null +++ b/cat/versions_available/pytania_odpowiedzi_logistyka.cfg @@ -0,0 +1,10 @@ +dir@#@pytania_odpowiedzi_logistyka +concordia_host@#@localhost +concordia_port@#@8800 +tmid@#@12 +desc@#@Wyszukiwarka pytań +enjoy@#@Wybierz przykładowe pytanie: +prompt@#@Wprowadź pytanie (po polsku): +suggestion@#@chciałbym zakupić samochód specjalistyczny +suggestion@#@czy są jakieś zlecenia od spedytorów z terminala? +suggestion@#@potrzebuję oprogramowania do zarządzania korporacją taksówkarską From 53d2f47b57ac13dd0d286ef061812632b9ecd93a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Jaworski?= Date: Wed, 5 Dec 2018 23:07:49 +0100 Subject: [PATCH 2/6] versions cleanup --- cat/versions_available/jrc_enes.cfg | 2 +- cat/versions_enabled/emea_plen.cfg | 1 - cat/versions_enabled/europarl_sample.cfg | 1 - cat/versions_enabled/icd_filtered_plen.cfg | 1 - cat/versions_enabled/icd_plen.cfg | 1 - cat/versions_enabled/logofag_enpl.cfg | 1 - cat/versions_enabled/logofag_plen.cfg | 1 - cat/versions_enabled/opus_medicine_plen.cfg | 1 - cat/versions_enabled/tmrepository_enhr.cfg | 1 - tests/addLemmatizedTM.sh | 4 ++-- 10 files changed, 3 insertions(+), 11 deletions(-) delete mode 120000 cat/versions_enabled/emea_plen.cfg delete mode 120000 cat/versions_enabled/europarl_sample.cfg delete mode 120000 cat/versions_enabled/icd_filtered_plen.cfg delete mode 120000 cat/versions_enabled/icd_plen.cfg delete mode 120000 cat/versions_enabled/logofag_enpl.cfg delete mode 120000 cat/versions_enabled/logofag_plen.cfg delete mode 120000 cat/versions_enabled/opus_medicine_plen.cfg delete mode 120000 cat/versions_enabled/tmrepository_enhr.cfg diff --git a/cat/versions_available/jrc_enes.cfg b/cat/versions_available/jrc_enes.cfg index 07f8e46..ae3c864 100644 --- a/cat/versions_available/jrc_enes.cfg +++ b/cat/versions_available/jrc_enes.cfg @@ -1,7 +1,7 @@ dir@#@jrc_enes concordia_host@#@concordia.vm.wmi.amu.edu.pl concordia_port@#@8800 -tmid@#@6 +tmid@#@1 desc@#@Welcome to the interactive Concordia demo. The system finds the longest matches of the pattern sentence in its translation memory. This translation memory is 200 000 sentences taken from English-Spanish corpus of European Law. Please enter an English sentence in the field below and press Enter (or use the search button). This instance of Concordia works best with law sentences, but is very likely to output some results for any English sentence. You can also use predefined samples, simply use the link "show/hide samples" and apply one of the sample sentences. After the search, click on the highlighted fragments to see their context. enjoy@#@Enjoy your work with the system! prompt@#@Enter search pattern (English sentence): diff --git a/cat/versions_enabled/emea_plen.cfg b/cat/versions_enabled/emea_plen.cfg deleted file mode 120000 index 6081551..0000000 --- a/cat/versions_enabled/emea_plen.cfg +++ /dev/null @@ -1 +0,0 @@ -../versions_available/emea_plen.cfg \ No newline at end of file diff --git a/cat/versions_enabled/europarl_sample.cfg b/cat/versions_enabled/europarl_sample.cfg deleted file mode 120000 index c90ed2e..0000000 --- a/cat/versions_enabled/europarl_sample.cfg +++ /dev/null @@ -1 +0,0 @@ -../versions_available/europarl_sample.cfg \ No newline at end of file diff --git a/cat/versions_enabled/icd_filtered_plen.cfg b/cat/versions_enabled/icd_filtered_plen.cfg deleted file mode 120000 index e6dd218..0000000 --- a/cat/versions_enabled/icd_filtered_plen.cfg +++ /dev/null @@ -1 +0,0 @@ -../versions_available/icd_filtered_plen.cfg \ No newline at end of file diff --git a/cat/versions_enabled/icd_plen.cfg b/cat/versions_enabled/icd_plen.cfg deleted file mode 120000 index 447fa8d..0000000 --- a/cat/versions_enabled/icd_plen.cfg +++ /dev/null @@ -1 +0,0 @@ -../versions_available/icd_plen.cfg \ No newline at end of file diff --git a/cat/versions_enabled/logofag_enpl.cfg b/cat/versions_enabled/logofag_enpl.cfg deleted file mode 120000 index c1c7f55..0000000 --- a/cat/versions_enabled/logofag_enpl.cfg +++ /dev/null @@ -1 +0,0 @@ -../versions_available/logofag_enpl.cfg \ No newline at end of file diff --git a/cat/versions_enabled/logofag_plen.cfg b/cat/versions_enabled/logofag_plen.cfg deleted file mode 120000 index 370b68b..0000000 --- a/cat/versions_enabled/logofag_plen.cfg +++ /dev/null @@ -1 +0,0 @@ -../versions_available/logofag_plen.cfg \ No newline at end of file diff --git a/cat/versions_enabled/opus_medicine_plen.cfg b/cat/versions_enabled/opus_medicine_plen.cfg deleted file mode 120000 index 8461a8f..0000000 --- a/cat/versions_enabled/opus_medicine_plen.cfg +++ /dev/null @@ -1 +0,0 @@ -../versions_available/opus_medicine_plen.cfg \ No newline at end of file diff --git a/cat/versions_enabled/tmrepository_enhr.cfg b/cat/versions_enabled/tmrepository_enhr.cfg deleted file mode 120000 index de131be..0000000 --- a/cat/versions_enabled/tmrepository_enhr.cfg +++ /dev/null @@ -1 +0,0 @@ -../versions_available/tmrepository_enhr.cfg \ No newline at end of file diff --git a/tests/addLemmatizedTM.sh b/tests/addLemmatizedTM.sh index 8cb99b0..b581e3c 100755 --- a/tests/addLemmatizedTM.sh +++ b/tests/addLemmatizedTM.sh @@ -1,7 +1,7 @@ #!/bin/sh -CORPUS_NAME="europarl_sample" +CORPUS_NAME="jrc_enes" SRC_LANG_ID=2 -TRG_LANG_ID=1 +TRG_LANG_ID=4 ./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt From f799164d64e9dda82d8f2d2e6e7b159536dcc9ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Jaworski?= Date: Wed, 5 Dec 2018 23:16:46 +0100 Subject: [PATCH 3/6] French lemmatizer --- .../LemmaGenSockets/LemmatizerListener.cs | 20 +++--------------- .../bin/Debug/LemmaGenSockets.exe | Bin 7168 -> 7168 bytes .../bin/Debug/LemmaGenSockets.pdb | Bin 15872 -> 15872 bytes ...gnTimeResolveAssemblyReferencesInput.cache | Bin 7207 -> 7123 bytes ...emmaGenSockets.csproj.FileListAbsolute.txt | 11 ++++++++++ ...ckets.csprojResolveAssemblyReference.cache | Bin 13598 -> 31050 bytes .../obj/Debug/LemmaGenSockets.exe | Bin 7168 -> 7168 bytes .../obj/Debug/LemmaGenSockets.pdb | Bin 15872 -> 15872 bytes concordia-server/lemmatizer_facade.cpp | 2 ++ 9 files changed, 16 insertions(+), 17 deletions(-) diff --git a/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs b/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs index 77a321f..fea4e9e 100644 --- a/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs +++ b/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs @@ -21,6 +21,7 @@ namespace LemmaGenSockets lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish)); lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English)); lemmatizersDict.Add("hr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Serbian)); + lemmatizersDict.Add("fr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.French)); } public LemmatizerListener() @@ -52,13 +53,7 @@ namespace LemmaGenSockets private string lemmatizeWord(string languageCode, string word) { - if (word.StartsWith("ne_") || word == "i" || word == "o" || word == "do") - { - return word; - } string[] parts = word.Split(wordInnerSeparator); - - string result = ""; if (parts.Length == 2) { string firstPart = parts[0]; @@ -67,20 +62,11 @@ namespace LemmaGenSockets firstPart = lemmatizersDict[languageCode].Lemmatize(firstPart); } string secondPart = lemmatizersDict[languageCode].Lemmatize(parts[1]); - result = firstPart + "-" + secondPart; + return firstPart + "-" + secondPart; } else { - result = lemmatizersDict[languageCode].Lemmatize(word); - } - - if (result == "" || result.Contains(" ")) - { - return word; - } - else - { - return result; + return lemmatizersDict[languageCode].Lemmatize(word); } } diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe index 1a297bbb9fb7b9739c026a740f903468cec5b142..2a75edab9c36843d285d1146243494d5811faa3a 100644 GIT binary patch delta 1655 zcmZ9NYitx%6vzMf&SQ7B+bz>!x7%)^3+1u23oM$Lg%Sf&mD-Ah(g1P8Beg(_cIp;2 zhUn4{fH8uW@k6ajg7U#4k?0D66cSUCS`#&*#z??dqS4?7Ni@bZr1d|yMA6CK-<;Py z_netKclT%aXJ2|wf2}!~8+f5w+%+(~De~h|;34o@1za-*<~ZT)K$!ZeB|x<|N2a|k zVxd^+^@~C>o}q)+S`G;V8*72hC4ffF1WZqqXS`Oai)~)JTo;!61fb^~pf}+S%H`gd za-pd7Zpzsr;U%eI&t3Y@oOMHvt(E1d*DWoEy`+hmnm_GT>2om| z!6-u`co#lTC=M}&iS6AXd#x~m9l;vT?4m?H69vmn8 z@Q%n}BV7FC^;=PAg1eYs#Xr||%;yOjwP^moW6Zm3IdS{;=v&hc&{mwVMr84WaJeX6 zEww}vMlT)(j862ByOAe%`V`QT?AEI;n>XXJkv~2fI(Dh?$c5b3i^iW>;kA}+j93A4 zRzybl)009LN6LUbQ`6oN`-uw2oflRnGeCw~k&%3~Ty*we}#q;Iu X;;3_~7@0X?$ZV!)SN$diGJ(GV*6$)I delta 1895 zcmZ9NYitx%6vzMf&SSf;q0{MODFqfwX<{V5`s=uuuv#)IcdkwqmDjw?u+* zTO))-RLVrdBN{a%1cC->kcxyDLJSi9P(sjzk{H4VP$Lo(V}um{b6YDoyYsu}oO{l> z_uPADrnjcIX76sj>R3y2|DGaoqyOk?Yi||s0(dP1hK>GdPGmh0p}wXPDDtMsa<5sG zid$Zv$iy$u*}sU1DCN*AY`&9>jR;4#_42ha^A9=(p|ZIgER;VYGETq_lovqm5RBE_LYi&Eoy5j5 z9vh+hjULvf_Wu0=lPAj>Za#x~Ecbbl7^(jZ>sBHRN)ULZ}&J@zmYN zd>11(oTx<{I`B5D5Uj>@xiujt7$}?0*37qqE*CUZHiHNUQj7hGl~T?d$w((;hsrf1 zOcXfb#9$!*p?@G_Pu~@^-auXD1*hb7P zrvR?;Y`^6&k(z}M9z1ZOu%iLDg2u8+&oVj|3^P~p>pWmA<#*o2jpfAHs^*6sl0IjX7Ov9O!sqaLSn)HaFtJ{qVd0a&B@@^{&Q+YRSgY*MV>9iI%Cc6m4ZXCyjALXsPKzq6#H;wt ze--o+f?M(5W$}69fBgExs}ANZ$mN6aKl|9J8+TMf$-|*zT?h1mUBR!U(U@*cr3dCm zVj@uMcBWgCode6GVNv6Dt$TKBPwSRsdOKirVGG%gPI5C+{#nuDK%f)tSjTTjlDEsg zjqqNxre`O@+kSfS;+u;PUD|ZyK;?!Z?}W9oAQA{pwk*k?C8Zu0(&7tSS_3%~J``&? zEC18xPtJdAidcS{FYUAqtZ1xj?7cqs$}bgfE!+3)+4)<8&;9DJPRG2KI=8LomG;hb zcgw0|Hzyl!O!sU|y74(x)sMF{q}-17wsg05KYKVjIQI4P2e|CrwI_)S1JRr&F<|BX QZOEHd19ssZ(O(t#2iv+t%K!iX diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.pdb b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.pdb index 46bce95c1627c42c9138d89ebcff6e407c85f1c2..228518ad40aba400d67204129c9382528636cd05 100644 GIT binary patch delta 1987 zcma)-TTGlq6vt;iSXiK3wv@dudt(cQ-Ev!QrD%yjk(!ojO|&2vw@ZN{;8IJ{Q1`=V ziJ@ATV>B9w#F)~kv6nSXHR+4-!K6MkS`#0Lt!*$dY3NpJVjo2RX8{vTDjqodn>lCZ zd}q$gH=jDDj-5|THBRr*wo4&I$6pPuONBX2Cx7lZS99a+#i%vatKE_^(AL*`$#egw z-HwA9AGdxs>^dI1Q2(h+ol`IeGN(i%zC}5u*E*{?Ov{e;;m}ZjFmQCFJuoyF7z!Qg z>-LQVL&t(4+EwPYiT?i@QD<<{C;Bw&24S3ZSFfQj%XHQaqScx>$;bHzG6jt%2`*5? zFcfJmnKEjVk6f64|CdwODbv{KTY#?sYPnS+w;#97w4=@QI0pNI&^2kHaic-Qo|fp0 zaYD)ndCYeFHkv1uS%tncx1eLQ?BmX$<&5m49BZxYjPAE)N_4ty0qrDab53;6n@u-t zIyp)AYz8?^DY^S(l%CC@ML+b&s`d-E{RV#*w3B{t6`3p; z=|;~A=7Bb_2+Spu+l^w@xh-KQY+M8vm;o-{T~>K{n5S+AF;6vvTR{hy4`NQ7u*Jvo znPB^N5N)*>+y*v)gP=I`sYEBj9fEQLq`@1tOa~ zQIEz8FklVpAc!@nuYwr7TKzg|^As5>Q2z{}6HWir|2py-r7IpwxCLW5$tS=~;C|2m zV*T-b!66*?q2Go+*ADeiu`q!IR6IYPY!ldxJ|`IfTfsvh*N_);0PIDd4|E^c1@?o` zFb;=M!!Qhhr$L@TAQq;+4Ps&HJK!+*E_fK621DRE@F@5N_!9UPI0oJVUk2}hh^_t( zs`&pC90!-dE-*>r2%%yszh0QW^Fv@u0Og7sol?XPXqB zCAM?q60zI~@5xqKZhH7dly!+xd*1%-xC0n&YZi!Y@U)B5#j>ECo delta 2290 zcmb7`TWnNC7=UMX+p?{+v|RSq-R^En3s{Ov=?xm>B4C9`5wrnPlv-+SDHP}y6)QbU zV@Ndiw~L*648Buq{IYYAYy`q*i=JA5@Q0SS_Lmb(C)z7V87_~`t!!MgtFyhOIo4%*$|Gi_ds6Y1``){g-dzuP??`!Pdm`pQ)gv>< z5x!cSA2xceFO%IrXQz~W90!t6gK6?~_Iq8f>+0L@J8oanCT~X6EcQ+bk@;fk_&nDcJ0aFBvAq z^Gm$Zmd+LjO>`sw?J9h6mpjI4l4wpO#7t+J#%ju=P)wVWisT&*O#MJ z=g>)8&#E%q2q(f9pa*V-a+R8uikHDQny1$Mrb?<@iL(atNJ<6b)!7*s@_bJ>5(d?HG_$eH4-&h}jybXh<=U(Aj|P_~0?}JKDp^Y#$M;`;UdHu<(VDnZK1F-f zp&WJi`AIA$SMVHtw$Kz;L`=!VT+>jIle;4W6)l>c{4ScS6#?^3MWO9*wj$u51XnAn zdS_86J~{HR+j%=J8EQ4_kmx0LvW>OHP`0E@YOHMty;MEf#@gh5rtT&$@PkIBM%b4m zCJn#)%^o}XF(1{4Y%W#?921;oxYF(WzyA?#m|)esas7h(9@@J)huZR0PVV$2ZFIGJ snut>RI6aeINOzgJSp)qwu4(mbn@^a!LzY5mADCc`M_))()_j%zA1zVd>i_@% diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/DesignTimeResolveAssemblyReferencesInput.cache b/LemmaGenSockets/LemmaGenSockets/obj/Debug/DesignTimeResolveAssemblyReferencesInput.cache index 5a81c4dc12747361a1c97f55fc667b95fa9dbe9f..41637e9d042a6b29ded8888bf72e00a06e5f2927 100644 GIT binary patch delta 284 zcmZ2(aoK!=6Qk+mKo-r7ITqZq1zuJ$1x5K;smUe9G0FLP$@xVonTfi^sYPX}MUxZx zBqu-Pejw{kP!$KSxU35SRnn7_d4*(s2&&r2D_V`q8PUK7GBJBLl IY?6x100y^bEdT%j delta 383 zcmca?zT9Gh)8vnwS{rjLxHW6CtYTd9lS^|`^Gb?i!ZM3X6LS=TOG;BR^A(H?3{7JK zit@8klS_&xALRb28BBs|#mPT-_%#Da&>=b5mRCeGfT}o3+<O$;>rBxy>{X@T@7agzo}`V(pbCG?!uHcdj>Q!vmH+5|BDz9-G- zy_H04Yxl&}(VXME@7|fY@7+7!yYId?Z)BhF`F!u-;9njMevoZzW077>Ka-!)Yca5YPCD;I#za0={oxNWU@}8KWk?lUS$r&;*R8|oRvF; zR>b2@B#S0w!o7B8XV%^kci3uk3P^zDT`vvd0&7jmj`rA*Y$_eh+f(12N{rZPCq11? zyHio8FP$2TMV)j#1^uzTP5>5Vj7@fY)XDgKQ>OUp>ZaD0s%y2YYfgzauP)O6()z`h z%w(&qxu9l%qz+^{OK0GpVSo(oS~{>I6&X!9$!um|Q!F!T$D8`IqtRHZslBzWYXI%t z1;=Ct);NiTy~;`Ury{!@h0mSn-)W~u`qIu0JlU*lXe1ks#^VMZ6YE||J;P}`(bNOi zXPVBtX7R%F2R1lEPTEOEoTlZOjFZ6Qbut6q6CCKl_0H9)^zMQ1x|JIX+*oJ0E8JeV zFx)L`m(|syx=Su!IxylkJCaI9Qt4>Sz98eI$DH)L*s(WZpDUG^P-0I?n-du{CLY@% ztFcf`WKych17vd|E-RXfimt$ej(QJzEcc)@C-$H{2Q@%p-vlTmqzcdOG-w^0On*Iy~}-43fh+gVUNsU3mIk2K-EsAleB=;v7!au@U10z zN&>ck8GuZ_n13|H?M!xUDjFM#Inf3?mj>)vhk?H#>WnzaDAEID1r3>qoy|IFnLSKP zk0!IRgp<#)P8)Lb1m5Ng1PtLnt$BPqILtWFa6ZwHHF!e+^Stnh+^}UUZ{fe&52vqv zZT^pX%~mjG8%aBe(E(zOwIh}7YF*U6$g1N5`0YE#@AtzwY`E{k3(RG`Pxo4uWpv8M zcA~KztsQMkEJ2{~Y3|Lgf4j|jw%6GHde8YftS|3dF8TP zzgrE)yqx3;5MvF9W*vj{scPL?*;vIo4kp^OTL*!8B}pHMu^vRVPH^Di4PKDl0LJVm z*$86%9*A}W2Pxj*b(T$F%ny=m1~EPaqTW!)0qs=v-|x%DD*ofZOY6Lzy#r441*XuK~(D+Vi2tJ7JAerGHN}(6|zN(?5;@H!nP$HolfiGC3VbEPAD;AhBnR4 zs7X`3o$1e}W69x$Sf)Rg7>PT~y5{V(Gg#q?IGIc;y*wS+8OvhP2+Mb?lRcxHW%D94 zmPm}sg|&vXldwldoM=xvm6#$>OJH{1R;&r-7vi*BO$hOb@Z3>YtHXknla58?njL2K z+uOEmIgv!JxoP~>6T2w6bt|7E^DT21h_M@_Pp+huS}-dtgUTm-JgcC|(RiHNNrFs2 zee~uO{0p4Wla@_P;5IW!k^*UDBFjTC`;+2zx>{&&XSl6B+_88u{?mf+wL@!1({gNcC!^H(Qd4p`phgyBePQ;;zAJF6dKu zJ`{}WK~@(U?M|sHX#B^ZG060;5@&xLx0{~;X=IMbLol(D?Y;p8<3^B)arRBn6lB6y ziL*b6+s&IvZUJdz>c>McUy_af6bi_y~pz%{xf$1ZiZR$wM%cls4Rjg7JBf2@5E9Ls^hXY9(&} z0&X|I2-3(*mxtH_BAfh&C>UP?nHY?J8JdDjJ1gF!iAn@+_~A4IB|N)EF+yd~|kW9c(o9demA8`}|!$Fh5j z!JfE1oC!%qh5JP?=o3@q5%l}(r{(j7WWl#i_~0rMeOmpLaB_GE7j&VQCXl5+XYU}w*t2Z$N2tyaL`vD*oR(m zC6rMi&iJgrI+QAMu3iJVz*^`$9-r&CPG@`S;-LROCo`N^6m4y&@3u$m2zsF};Hxj1 zke66@mvF>=^V=v(cz*|lRW7_QU)tjC7HAp2Tfl>Ge%FJ?w@8%|z6YR8EYBk@ zun)|oc?KtZmbe5QAxJ2Qi%}qUrpr^D{SXa2%zQ;u-~;59XaZ50ca`S0&qMX*?miC{yppNgeB}}fqh^)@(fPE z>UMCR)oox3R&xU7S$&X~;}Dz?N%NjYL0bJ|iaAG;;%8j(vlMfVB(44@iVGo4iD5s% znavYuy7q?Ce4eeCpEo(M56nEx{<*6Cb2R%oAuG0@Gc0N7Arz$jhfxrmb4l?LSNsCS zgs~L=n=Ae)#rr^{TYiSZ=>@mAGcmrli?n>F4;MLfm*~&)lFw=ZknOG=?u+H zAH%K1Zu&T+((sp2kZwB4Dmc57;#XYps}$b=BK`3i#RY#@=6^vTW5Iu;u*UVnNyY*l z8m~j6n5x=rX*Qdx&GnkiH{n*#W-lOcJ}8a*H44(^H(3Q|h*JEPD}I~eTU_xmSNsme zoHI&SFll^uX!vo33XKUL{UtQG?kCzQasfaH&A>##L^$9fjh3GD1-~uq^QzZ>Y>K%G5I`9dS^P@_=@VC zqndYKR=x9*<{i$1J?|U@!$WXlE8Q>ynx%KnWEFR~;+d{^7R8+W%B$b&ir+^uC%@8Z zXMrmkY-U5K4mOqI#v5$TX5B|rXB^g?aY%K>ziQ678!h&n;X#G7Woh1AXqL{H$0|5~ zmg4!Y_&ka^f0kCCPq7+Bym58`&ZvV;rKWq>kYH$LE1ps9|FLHO)2jUkHT%C@(*FIf zq3yH?jM>2|?s3JPuDFZhdtLEDSGn}Zu1B&wL0^WJ(K{Hq>y z?9(SQ>p)|CGm+sAlU8AIkHc1QX&GJdQ{s=h`+WHf_0?)O2|gQpkz;U zBf4d7!X@HsN*@tlRTxV=q;{AG^Bo4bLF&Q@tJh)NK_|e8YYkNHM*k{YUK;>n>fJ5= zmbn!ygSusIBb4q_&?D|u7)#uvLTSGSB`$IlLy3zWH7Hdvl%haP$xPEShX|%GDEJX~ zD~u&RuY&0=4NP1bD*{u9%VIqI;s7XzXO71W_}3ir`M$dV)iEDp920@ZL2JmXV&ugE zFC~a2z~+KjlDPY<0yS~F!dT)nD(-I6aL4sa4|i^E9pE}<4N6rEr7RF*EY;jS!hP5ij+O+_c61G0#q3t@N z?Ro`ZVxPiT;-f0suG7%Q7Z4uWh$S8YzKEznsfwZW2_VJ`TQ_(>x{*NIqhLo|qcE1Z zS_RUW2Bc>JnIa$wzCvQy8h->1K~h6l6(j7^fRYldZuKyB8!>j30xL11FqTNE7)xpx zI}FeiVJvW@1|@f)p*pwp+yTUR!Rk&ANOuuPF$Ftfr@~lbSOwCM2Ba5C0*UXLYcZ*c zW73y^nDKA)EOS3`WGm1S+ZDzVgDQ@;X*l|6DI5uI2q-z|y<7uWbw{fE0VQS7djM>1 zVc|hy>_ZBy#Ab!D#0OQ3ZPGBty&uJc-YYdIRX3Er0mPJ99etQ!TCd8}__-27Jq zQWXQ~aUjOc*Prlk^dxb#M1hW2tT2{Xq~d6yhNE{%;Ye_oX30U1n=ESpt73rt10Jw4 z==}(6ZqR!QCDEq9O0+7BC0bOBg*A+EL#`LA_-2MjfE#mbP^w}mJ&OliCSQM!U^-vH zk2p_ZEHPgN(>x7K4W+;&xN*8mx0_hUjtPYgX(8^z{)82bFjHl@I{owECoGcrovd_OchEqG$`>4i()A8 zON$zmsu)Ty;Q^N!1&}GCP!gsBD^ag7mI$gCGc=6N!>u01ytJJkN!6fK#ZY<+57=9)dK*j* zNXJkTLcxykDU2oFd0Y8#^B5R@xXFp0CHOgA5s*YPq$N{!ew|iBSQR5IfX}UzU{wb; zhcN>s@hf1*{TUDOzY1fC|4}jahK4bI$ykK3KxYj~)%{M8|Hq6MtoRexIUr3VkX}=; zBVJV)OT3~2>8J*zu984nSOZdZ1L-|LjJvR2_!@8qar7Swbi}_aj3s`d;^;*UNBmg2 zC?JLSQ8W*~;77|Phd+LETpRwXc=$UDpengy^?sbl#jn}K-3toT#1VzD#92){KXpoxAqQfn$>yaX0a`QDVm@iVyK)B zet@m(NS4tN8ZEkXtlXviK2q0uUAaqdD7y5mC+I6Zav+)1LZD#0w;L*#gq9=0xb{Mao|+r~GI+9ltEn@kSXPA+8PUwj3*WDVO;4OS#m?-ZQ>Sgt&;K zU&_UtQkM$O&UvE9%bZB@@C#0)N_J8v$a*zQluK0;V)~QQ#`LG8HjY`kHjZ5-wJ{>) Mv@s%;(l+(~0O}XSCIA2c literal 13598 zcmeHMTXP&o72aOWu6D+<)^-Tk5W)l-aMLmW}K+E@Q8=9__ZIk!E+q z%&cM=SK(Z^BvgXo2?A6BFOWbLm!gWxQ{V}xqKd!|fOmM|7r^)R-e$C0VA)n7RO+qy zwoh+Q_v!Ah&*?rJ6R}wAUr79B>WTwiIBOR?*LUZG%!!Ka6f+ksf7#DWxSs4WFGQB(ZabzKY3VvqF@)rYZ&XLqw?*M3o|5&vX z6^t3zDo$F3!1X$7<4H}pr4`GQ-X1nO;TGkr=dRjC>7`hZwQoudBl44%ET|Bv&d&+egC#>arVMo0kmFt-~|G6~2pjpa_dg-S`513!1k z_A8cSW`j!6cFlpl{^1<-UBblpxfxk1S^aqqj&2H@(oJn`FP9wEk%5j@H?yOY zR$yfaSs9$V5y&#Pp|AShg}(Ef**E#og-(i_*@+q;dm`sAZwB4fWObwUGY_Eknz|MRaY{EK=#LRT3P>IV7^?$t z|HFMF10zO)6Z^lPoOnEr$)V!0hZep+H1*z;VHn9nN?D#(2Kxs4j~WRQV5=(26MzA? z*-(~5-Kxrde7&;VP+L{m$JZ;%jlWfuePX?`G*DYr*(cX2D`-e*E`R*e#j5#u3W9!^ z&;v+54G5bLL8HqT{_^+Bua9@b-ee%?y@WnMvL6tBAwdJZRi|S>Dcjt_f{uiv`|^3S zIvs-$^dZ79AbA8386BO@Rvq1lQnop|1fAnnRd#fpvRy|}aISGF3twkBqPx4zj*M8t zh2bOpM+b*w-{{eVi8eRnka;rbw*0WOPFa37@a*zJm+fcm(uyPL?GG(lK5kHDsC z+$${F0WNsBYEPFZD?I(Pl5Uqu6?F;j@?^MNAcZ1 z(03ORqWXM4kH$J__U}jKDZH*v1NPE)FbNu56OVoxkURr8-kj>D-W%q}3|i`>Ay@j| zBR|gKb^RRSlYqT+2}~NP{a^NP7pf%kv;s!H5p+v8vnu}9m9GWprpxoLo@Z$5gyqXr zzIMat&NCu)+*xovJ6J63kBXD7H{q58&vo|YVWQmf0=q*bY}T^9hwE*qI8tEG*^V8o zCG(Sxwcw{!L(TU^XJ<@wsMHycr@Cu)r>4bl!wi+@?S+bGF~V0jR=6Y8vuhD$@9nLF ztyZyRXvgkWpPK%W8EDf%*xIY{VdIy?3To;ow^z-=rb9j;$01J$Sj`jBTE{*jz z!hpT4VZh#c7_e8h+zbOY`W+MWUM5RE4PcB@zXA~SXC|eq09BkrM$p5Vc=T%kRr~@n;yHlw z)Olp~wNl>Duj2>Bbqks6o5^)W@;US(zFp&`b{oRCY65>Kr<}}eynk-oasDaza2zpKvkG=#@#mmSDdQLSTJY^)5_cZhp@@fjo$YdYj6x0Ki zx{HRz9gKv#QM(P{D>VT>VAO9yYK-~~2+dK`Gq;YKuGYk(R{&~wtH=mCUlWi1B0v@2 zKt`Mfs8O%6xXA)Tzlj1h^v@!5`2h|+66Dlxp>^>}ZOGbf2roy*|5AATwhs%{HMTwy)MYW@)iL2ca8okCTJjp zMl-P8P9^c3Qwh#=wNuHLu)u9i^lSKmo7B)>2Z~B)>xdvE7{U+lBd@lP@D5DO6=2Ig z(&$8XNLBx1bsHXa?_hyb_5WoJ{i_hGRQeinq7-6}a6*D1mcvw9icl$sS6ivXgU1e) zwlS5yjs=b!9=?HsDw*CwPRI~{L@^{7q7Ww2T!c(KrnZnNeI2Dtalr#4-ShgS@w~Ug zw{6U~?_hyLrt7;9t8Duoa^hNuU?LY13~@Egwkr{~@ovzHsB@!dLJ4D*XMEVmJxc*1Yd&pOL^k?M6(;@1Jo{(UO!(kph z72y#>|1CU9Gsemk7mTsCbAM^Z&Qh%Mi<`Mi2ZsLknqUl#RpWTVDb2_TQ(Q1I(zZ2T zXQQq0THCfYURIdmf|r%Ht?`5zZH*_*wyp6D5v|IzMBA!#0MV*+1Z}Hwi$|++tGBJn TRb+|_t|F`Ozn1U?Y1h92M0)Xo diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.exe b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.exe index 1a297bbb9fb7b9739c026a740f903468cec5b142..2a75edab9c36843d285d1146243494d5811faa3a 100644 GIT binary patch delta 1655 zcmZ9NYitx%6vzMf&SQ7B+bz>!x7%)^3+1u23oM$Lg%Sf&mD-Ah(g1P8Beg(_cIp;2 zhUn4{fH8uW@k6ajg7U#4k?0D66cSUCS`#&*#z??dqS4?7Ni@bZr1d|yMA6CK-<;Py z_netKclT%aXJ2|wf2}!~8+f5w+%+(~De~h|;34o@1za-*<~ZT)K$!ZeB|x<|N2a|k zVxd^+^@~C>o}q)+S`G;V8*72hC4ffF1WZqqXS`Oai)~)JTo;!61fb^~pf}+S%H`gd za-pd7Zpzsr;U%eI&t3Y@oOMHvt(E1d*DWoEy`+hmnm_GT>2om| z!6-u`co#lTC=M}&iS6AXd#x~m9l;vT?4m?H69vmn8 z@Q%n}BV7FC^;=PAg1eYs#Xr||%;yOjwP^moW6Zm3IdS{;=v&hc&{mwVMr84WaJeX6 zEww}vMlT)(j862ByOAe%`V`QT?AEI;n>XXJkv~2fI(Dh?$c5b3i^iW>;kA}+j93A4 zRzybl)009LN6LUbQ`6oN`-uw2oflRnGeCw~k&%3~Ty*we}#q;Iu X;;3_~7@0X?$ZV!)SN$diGJ(GV*6$)I delta 1895 zcmZ9NYitx%6vzMf&SSf;q0{MODFqfwX<{V5`s=uuuv#)IcdkwqmDjw?u+* zTO))-RLVrdBN{a%1cC->kcxyDLJSi9P(sjzk{H4VP$Lo(V}um{b6YDoyYsu}oO{l> z_uPADrnjcIX76sj>R3y2|DGaoqyOk?Yi||s0(dP1hK>GdPGmh0p}wXPDDtMsa<5sG zid$Zv$iy$u*}sU1DCN*AY`&9>jR;4#_42ha^A9=(p|ZIgER;VYGETq_lovqm5RBE_LYi&Eoy5j5 z9vh+hjULvf_Wu0=lPAj>Za#x~Ecbbl7^(jZ>sBHRN)ULZ}&J@zmYN zd>11(oTx<{I`B5D5Uj>@xiujt7$}?0*37qqE*CUZHiHNUQj7hGl~T?d$w((;hsrf1 zOcXfb#9$!*p?@G_Pu~@^-auXD1*hb7P zrvR?;Y`^6&k(z}M9z1ZOu%iLDg2u8+&oVj|3^P~p>pWmA<#*o2jpfAHs^*6sl0IjX7Ov9O!sqaLSn)HaFtJ{qVd0a&B@@^{&Q+YRSgY*MV>9iI%Cc6m4ZXCyjALXsPKzq6#H;wt ze--o+f?M(5W$}69fBgExs}ANZ$mN6aKl|9J8+TMf$-|*zT?h1mUBR!U(U@*cr3dCm zVj@uMcBWgCode6GVNv6Dt$TKBPwSRsdOKirVGG%gPI5C+{#nuDK%f)tSjTTjlDEsg zjqqNxre`O@+kSfS;+u;PUD|ZyK;?!Z?}W9oAQA{pwk*k?C8Zu0(&7tSS_3%~J``&? zEC18xPtJdAidcS{FYUAqtZ1xj?7cqs$}bgfE!+3)+4)<8&;9DJPRG2KI=8LomG;hb zcgw0|Hzyl!O!sU|y74(x)sMF{q}-17wsg05KYKVjIQI4P2e|CrwI_)S1JRr&F<|BX QZOEHd19ssZ(O(t#2iv+t%K!iX diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.pdb b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.pdb index 46bce95c1627c42c9138d89ebcff6e407c85f1c2..228518ad40aba400d67204129c9382528636cd05 100644 GIT binary patch delta 1987 zcma)-TTGlq6vt;iSXiK3wv@dudt(cQ-Ev!QrD%yjk(!ojO|&2vw@ZN{;8IJ{Q1`=V ziJ@ATV>B9w#F)~kv6nSXHR+4-!K6MkS`#0Lt!*$dY3NpJVjo2RX8{vTDjqodn>lCZ zd}q$gH=jDDj-5|THBRr*wo4&I$6pPuONBX2Cx7lZS99a+#i%vatKE_^(AL*`$#egw z-HwA9AGdxs>^dI1Q2(h+ol`IeGN(i%zC}5u*E*{?Ov{e;;m}ZjFmQCFJuoyF7z!Qg z>-LQVL&t(4+EwPYiT?i@QD<<{C;Bw&24S3ZSFfQj%XHQaqScx>$;bHzG6jt%2`*5? zFcfJmnKEjVk6f64|CdwODbv{KTY#?sYPnS+w;#97w4=@QI0pNI&^2kHaic-Qo|fp0 zaYD)ndCYeFHkv1uS%tncx1eLQ?BmX$<&5m49BZxYjPAE)N_4ty0qrDab53;6n@u-t zIyp)AYz8?^DY^S(l%CC@ML+b&s`d-E{RV#*w3B{t6`3p; z=|;~A=7Bb_2+Spu+l^w@xh-KQY+M8vm;o-{T~>K{n5S+AF;6vvTR{hy4`NQ7u*Jvo znPB^N5N)*>+y*v)gP=I`sYEBj9fEQLq`@1tOa~ zQIEz8FklVpAc!@nuYwr7TKzg|^As5>Q2z{}6HWir|2py-r7IpwxCLW5$tS=~;C|2m zV*T-b!66*?q2Go+*ADeiu`q!IR6IYPY!ldxJ|`IfTfsvh*N_);0PIDd4|E^c1@?o` zFb;=M!!Qhhr$L@TAQq;+4Ps&HJK!+*E_fK621DRE@F@5N_!9UPI0oJVUk2}hh^_t( zs`&pC90!-dE-*>r2%%yszh0QW^Fv@u0Og7sol?XPXqB zCAM?q60zI~@5xqKZhH7dly!+xd*1%-xC0n&YZi!Y@U)B5#j>ECo delta 2290 zcmb7`TWnNC7=UMX+p?{+v|RSq-R^En3s{Ov=?xm>B4C9`5wrnPlv-+SDHP}y6)QbU zV@Ndiw~L*648Buq{IYYAYy`q*i=JA5@Q0SS_Lmb(C)z7V87_~`t!!MgtFyhOIo4%*$|Gi_ds6Y1``){g-dzuP??`!Pdm`pQ)gv>< z5x!cSA2xceFO%IrXQz~W90!t6gK6?~_Iq8f>+0L@J8oanCT~X6EcQ+bk@;fk_&nDcJ0aFBvAq z^Gm$Zmd+LjO>`sw?J9h6mpjI4l4wpO#7t+J#%ju=P)wVWisT&*O#MJ z=g>)8&#E%q2q(f9pa*V-a+R8uikHDQny1$Mrb?<@iL(atNJ<6b)!7*s@_bJ>5(d?HG_$eH4-&h}jybXh<=U(Aj|P_~0?}JKDp^Y#$M;`;UdHu<(VDnZK1F-f zp&WJi`AIA$SMVHtw$Kz;L`=!VT+>jIle;4W6)l>c{4ScS6#?^3MWO9*wj$u51XnAn zdS_86J~{HR+j%=J8EQ4_kmx0LvW>OHP`0E@YOHMty;MEf#@gh5rtT&$@PkIBM%b4m zCJn#)%^o}XF(1{4Y%W#?921;oxYF(WzyA?#m|)esas7h(9@@J)huZR0PVV$2ZFIGJ snut>RI6aeINOzgJSp)qwu4(mbn@^a!LzY5mADCc`M_))()_j%zA1zVd>i_@% diff --git a/concordia-server/lemmatizer_facade.cpp b/concordia-server/lemmatizer_facade.cpp index 43b0aae..c853024 100644 --- a/concordia-server/lemmatizer_facade.cpp +++ b/concordia-server/lemmatizer_facade.cpp @@ -11,10 +11,12 @@ LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) { std::string plCode = "pl"; std::string enCode = "en"; std::string hrCode = "hr"; + std::string frCode = "fr"; _lemmatizersMap.insert(plCode, socketLemmatizer1); _lemmatizersMap.insert(enCode, socketLemmatizer1); _lemmatizersMap.insert(hrCode, socketLemmatizer1); + _lemmatizersMap.insert(frCode, socketLemmatizer1); } LemmatizerFacade::~LemmatizerFacade() { From 3c575cd596679295b91d11979e6914d0be219fa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Jaworski?= Date: Sun, 30 Dec 2018 23:34:00 +0100 Subject: [PATCH 4/6] exceptions for lemmatizer - alpha --- .../LemmaGenSockets/LemmatizerListener.cs | 23 ++++++++++++++++++ .../bin/Debug/LemmaGenSockets.exe | Bin 7168 -> 7168 bytes .../bin/Debug/LemmaGenSockets.pdb | Bin 15872 -> 15872 bytes ...ckets.csprojResolveAssemblyReference.cache | Bin 31050 -> 13052 bytes .../obj/Debug/LemmaGenSockets.exe | Bin 7168 -> 7168 bytes .../obj/Debug/LemmaGenSockets.pdb | Bin 15872 -> 15872 bytes 6 files changed, 23 insertions(+) diff --git a/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs b/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs index fea4e9e..aaebab9 100644 --- a/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs +++ b/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs @@ -53,6 +53,29 @@ namespace LemmaGenSockets private string lemmatizeWord(string languageCode, string word) { + // exceptions + Dictionary> exceptions = new Dictionary>(); + + HashSet plExceptions = new HashSet(); + plExceptions.Add("i"); + plExceptions.Add("o"); + plExceptions.Add("do"); + exceptions.Add("pl", plExceptions); + + HashSet enExceptions = new HashSet(); + enExceptions.Add("d"); + exceptions.Add("en", enExceptions); + + HashSet languageExceptions; + if (exceptions.TryGetValue(languageCode, out languageExceptions)) + { + if(languageExceptions.Contains(word)) + { + return word; + } + } + + string[] parts = word.Split(wordInnerSeparator); if (parts.Length == 2) { diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe index 2a75edab9c36843d285d1146243494d5811faa3a..a3ee5a9c6f09246b1daf7e7bea9141b1c03e214d 100644 GIT binary patch delta 2122 zcmZ9NYiv|S6vzKF_p!UT%Wd!7cDs~TE|ivSw_=Nr)JOy@k2Xf7EMThjQMJ*kaO-Z3 zwxZh-!6$0HAt;)t35oF;v56syAADmZ#%PSFL}G$TjT)aHdzCuO_9nZvQLj!w|ilJUZ+GlpZ7;c$r#yy0h00H_g0`dkMgWcZGCaRifS_DM9AVf6y`eN0ws0sRqOQ3ut~XC z)uoDD*UX@vWrxjbC=tuD){dR#YO)lvEd)2FM^%_Fb83#tl;b&V268rPQx&~87-vjSSD(%-;&zRnPY% zJC_|hb}Tgu-mH*Cbh3*h*Gh}psL9Hon`Chs?*MLP|F@F!BZZ+&*A~^9M-sp~_KoZF z*iX4i$*b4*tYCkT`WZ5M#cD3GUaHdiiLlNIP>1uisl zh6_R2N8G&2^zoMF!=`WA@QrDi#s)KI)Zq!z##yx3cwH)Xkv@K)r;pd5_@d@Jj9_Dn z{FIhby`cg;1#U50guo@nVv$BG&Jt?^HE#mH zFa?3vw6-m?QESu3$3}2A=k)7WOkjYTf$7a$ILWkzrn#(myJ-X}F?9&_|YcQnc zn>0r?-yrMw9n2$3s?ZafNqlWC!RL6?JdZrn>>+=YYf!C6>yI;!CA6ry)##S@q8{HG zoK?byGGe&H@)53kSU!QLa1i_O45z(`J>>gjKRzLUF2{#nI$`S`C|;Kz9J#V1 zx<9ec$&VIC1~)G13JX)KoLhynt}wcKaAd>4jWUV?6 zA$|&YysT?L8rMX(yNy0@cl3hFz z7{a^R8})9t10nqJ2{VA-X9XhOVDU5Nk8h0zD_g}qQx8=J+!gRgzF$>E-Sm3yMv~vy zBp0zZzaszMB|A6m-@Ksv=~c6L_pg5Li>X6ZapAAw7xzTFUdh4aytM4^h+Kc)lw19$ PB{z3Wd38UE$@bVkP%=Sa delta 1791 zcmZ9NYiv|S7>3`ObJ^X~?RL*@yWN&nwp7ZtUF)L8luMLLMVoRjRT?Woh>$|$Knof~ z*w&JpO3p=evPr;4=@nVhz^B!)t*s^%G4%m0u$>ey5lr z=SKY(#GX_#OAnt%*d#2hss&aT0R}Z2u>Jm6)?Y546BYg^@?dVC4ucC*?vjA%l>&}q zALvd_h@wEI19oC4%+@m%*y@$jnV1l9(={_?kh=s&7+#XrpjSbL|6{YuhqSdg*uzC^ z4W-K%61N)5|BcGgV(K8;;aUxtlC@CDyau?a6ii_M6hJsVGngV>sU``g{%!zzX?BW} z|JCxLXMs0;_dR#qcE-h+YczaIQ%Gq8@xk7<-4SPG`hP-JM z(p<1p6zG|ws=kwQ*@TQcu9Gp)Xrnw{)OaS4xXtZN*YSy@7UFgw>DAE^FQk>O?FQ1U z$kqvUa*y>m@tHx_a)Vx)a-C}>y?P4V29NbQ;5^~xd6Scq@nX2eUL$SkjDOflh=hO2 zYJcEUW~|)cJi zrJa&WGgzI#V)GBO*|?4bQXH_H0Q&I}lL?&D+TLWF)_%L;+XO=hbk!6hAERdAn%4H_ z(9QK)P$vQy#AO*078Yu)kjBR1f-kwkC=tBCoMwQOc>Xr|DJ_p0A+#BS_eL$Cf^<+% zMln-!mgaoTR?XZB-F0fNBg@dOsiuLxe0Gz6#H<5qxNH%e4Sj0!x+xt5|7j{-Zqbs_sC%g5y46FkUXW6 zo+nq!3*=EbrujL&XN|8Izl3d=XZJEW!q?M}S2(>ZhVUa#PZ8px32{`BQ!t0DMGM)8 zPV!#OC+6sGp?0+4NjloK<#EmRc$SvUI70T~J(0yKc=&~H-s~&f3-0@kXPoC>-I$xt zRZQonx;{kn{~d)RcQdfiyZ(G9?hM?@;BRn7@=Zn0i2VEEgCf5);t0R9WUBD|eI>Qg zr_cTQ@zCK5EeA%rzNsJEJ=-6lYn^|k?r?WnDIsFJ%@krC6ob_kZ+xt;-+w3CAO`Z6qA8K#pHi1v zthz8&0vO}p<ZeE8APz*G#rdJKa#WXzx#JPR{z_-4FArSWv{mH~x#%s5&L525S;B cne|u3NAju47LkvXj$5)hn|CXJ7lYZrKRr<(yjH6(LJ-ou~f ztQOcoWh;wKMlWhB_-(DW($<@t`J$Oyv)-7mDjoHvcj@|^-v!5gQD^tu&vTyVInVE$ z@A;jd9@K;56&cf#Y(;K3@V%OIe2&tta{f_s0lz=5srkMxMZ$NTr{fm}%UVBb&Y$jT z`S@qIFtZ|krE7k@HhJ;)@YS&!XM0*+U(x;f&ULd5pQy|lU$SV8&plB&ujYd3PWmsK zj$D3c>WVTsYa^yUuf*SZy>5;BLf$vZ3HwZWsK}x@^rjQ>naiQAc2rc*&d7}y^!qZ$ z{mGKK%KTBZJ`rJ<%gX-3YZq7~B{??15%Fm4*x`M#0|UML;|E1n6SsLw;|KbhmSiMz zh^OSl;;A|1)M0bwDM@|Nc=s#O?%2}Q5Q%=@5qTxy1i78k2bGgMrv zI?c{cXJm2V(Kwc()A)#g{X|YDSr7=Pc&2Rb2t9f@`;9j)oufvNW!Hab@}3bK&Qm#5C2CN1?o9AtYh?q_+%_8WC;HN8mD8 z3$x&(FduG&Yv3mM0IY-9(f}LbX1D{g4E-!@hCOip&)d3BBq1$0Uc%7|4=n_az{eQB z2Dicy*ak=8cK9x&sD2OHLLb5>7*D~Sa0WgJzl1jaHGGQkIk*e{2%m;mgeO0PaRWyO zyaji|+pr6+Wb1oihGa^X^w2lcOBZ?>j4bh4yPcV+E7`ow)t;Rr_sA+V#b?f!H6@Z~ zr|z7+Rqw)r+EJcm&3$k=L@AvMdtoU=v7t)X&v-rLerQfe24DlUn?+$c%ot$>AwBgG zQX`CwCp1c+yJ(aU&F?ul1Yd;1keHyj9)q@^fheyh;2ZE1JO)q0qwsxr9DWMlf>!mn z;kS^v`Z_!bZ^Bc&Ao_0%CVOg6LaX+xFu>M1Qk@Pdsx#mj7=$0dd^qJcl%H28c8t%h z>qv7Z{HC)q1l&WFXB~5E?OkqJq~5>cCU3cIeQEYSb+nmlRfT4}s@?I->O^##jqbI7 zUb}j6(NYrKX8je54oQu%L5clG%-$pWX-l@$dG^%r-1gd(q7n%EMDAXgrlhoXu>`dO z5n}e@*=A$rhYf{JjnB+)2>#DUzB*2brAeOzz RdGy@bhP`UG>Js`^{R^ZfE5rZ* delta 1238 zcmaKsTS!zv7{_O3w{6$LlyqCy)m=BN5OdQ)yQgF(6=G2k-OFxvv3o&SdyFo0fwYl= zFjy!uB8uGuQBV{;1@#coLl4sJp@#@FEa;`nZ_e&RKGes~`F-E_pEEPx%@h5-;R~7 zTkn>Bdow?>BKv0Z?cLGC1IfBuGOJgmN90SR9#As3lF7|ZCB;Vj}Qa5kI|=fFl-3s(yf5p_5_Nz}tO zI1e6&bhdmVOUK5|i>G%(Iu#>{Qrc6qDr^D$jJpVqgp1)AxCD-c97@MTxRSU6u7VUL znA~D+r;rA?p4cp~1vbNtkTHxNx0d2;C1Fmu6|RHZAWdwCakvAXhh}Fqq$sN!Qj~QO z?uM5j=Vj)5AVYR@Cj^(AK+2=8OGT&>pPCc?%f3GS}_|^ud#m zOJ$DJDuAc$r-RflwE zb(3=3>Uim(QP-M>V^f^ixKfl3ntZvFH;2$e^1m zyZEDz%4kxwRAyWjv$-;XHvBJ5zo-eRUZwkM{Qvo`Obw`#yb(e?;O$!4ai(@LN+Hec fO^{ZgmrbAJGnd}nFMqPGMamFgnq4Z{+$sM6&cpJ@ diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csprojResolveAssemblyReference.cache b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csprojResolveAssemblyReference.cache index 642f8d9c08a9ee81085e9fca0fceef6d27858ea8..08e454dd60c5e8bc92e0955212eca78ceee26490 100644 GIT binary patch literal 13052 zcmeHMTXP#p6;{im%S;qI&a#levN$Bb?#7WN+ln1;OmO5lwqtu8%cPcAMj1;jd+d?M z%#6ITDK?=B*j=iCg(sl6zW@sl6h#%6r!Ag((<8iyg;Bp?Iq1 zs87%7+v%^*>2nldfk5D2^xzjyEJ);&^G4pb9INQYuU8DS5MS1vEhnC~Y~J5@hue-} zmB*8*cru=dC;I!+71OQQe7wvnuC1H>eT$Vf)5zcAyUW%VFORP!hKi};NHLi#q!Rj& z9>QSf+K-XhbzEM`xVpL;3oY4W^FQ*h$by0hWxYbM96)h;3m$UBNg zNI;vj^um;$cP%?oTTe7?m9}-8+o#ZJ+A8ox+uAV-+>W9mW9)H-R^){5=w^jGfk0PR zAQUur%*acZ#YcENRMS{O6#^>>YF01 zIDOCEAHDp1c8M3c&C7Y-H{m$EM4IPLwxQhY6v^{fEPE>(U%0W{ot@*Ql75qyGgf|!lR%x%&h3@- z^P-a|b)p~1oJ$T(#x&N%Fe(j`Kayc;d-rn56a#5A(9Tf~G^x9K93;ct8~0sa#xZlm z*e7W0+)<8=U60WqlaILTfI|~Mz--|trz=#2Id{`YnZwR`(dKJp=3H@V^RCc%4|Tk8 zluH1t=8W>E#Av6Edb9=Z#;0Akdc1p&dVAm~#JWz?NOO;Cd(h)4*hRogeG24VAWwt5 z8{|D8XF$$^ycgtsAkTojALLn(4-g5VgNqG2>2Y+D6S@dfD;yAPaLm*lcit)(MS~Y& zdez?UE|7(d75FwU7sw8i`HeYw-F3Mw%yhS1DZ56A*Zj=MV%;F4Ss)k;vfxR$ufS95 zKOCO^)3l~(;XYzDve1e6P+}-~MGIk}|NF@*N|+|w?^hZQJ}<;;?nkJjVZX4l_(2-1 ztfC3tJ@@uMll(8!>N#T64}zR0623r0+ZiO~q!J^8BN_t$u$!_5;Yno;G62Lc^kKgc z0OS|?h+hZ*^9w!a6AA%In0^gj5<(K>ad4kB z4UYbGv8us1EnXb}Ne~GqiFh;!gXY)Zpb%jg@*h>Fo=GkVV_V42J=bViw2rg zw0QM0$Ow_}C=q!tVpphQp>ffdLnQ3Q{vxE25j~YpT~1yZ8s>@7Dx|5Rxg+*49daM=cQZn106CFIYnz%~*}11#TO85wp<^_g->kX>!<5eONc(4F8 zh(VMC?XH;CI5ks0N~9NV5D$Z)72fe06~Z4QG9e-{aTRaQTcB`1k$Qc;6Vy`#23*Wn z#MK9%?{#Xfra>l&^uiV3VX$pt(l@9Oo+7f?oV4#Qeuy>Pq>dvnp@&(+G&NUeKxT>b z!WiRWunA%fx2O=FBQn#vhC@t#o_dVH<{Snv3)EbFiAXOD7aj%B{x9bjVPGf+E1=~X ziJYEhQRH-Dxmwa^rd+ht9MUD%b%#fDwOej?4q1)~bKSBHccXO1%T8H#+A6!YWuDHF ziL%P;7>=?li@ISyUGGDLW3I7gn1;I>&P|#6y3->%YPM&QNPu;TClU-syK8o*Hj7ll z43&$ zo5~O94KES7iA>tqz`(%l*g!q6GvLqb4Ak>DwY*M)A86!t)C_f|{FG9LkESO^%_BR= zq2+RcsPd2y{uAm99nzP8ro2U^Ln+ET0yyQz)aQQm=WU9pTnoW=|9?6(*zK0|W;CsD z>v=j+0VNP^xzn`?TiI-E1g5lCx{?NrrssO;HO0OO1(OZYWOMP zURP1wG?@}ubM$5IkYM$sqttuUBl|8v03cHy67!uJLmvVf`!;bv9d@66iwb~?*nBoY zRR92<=9=Nz#Nvun5Nlqi0)wv;)f@F{1J&?;qI$Dly@hIcKcNcT5Y9GL;q!o!X38YE z^0+kP8wSZ0)xx~qq{TNB^_3b!A4pMOCa$5VFA>vJ6ufP#qToKo(kp0Buew8o!J&%k zSL)UGQ9WC)-mO>fp&Bk$DC*NxINho!Nkg9@iRH(wA)%UAF_X{As(I*9%>&{Zs(F=| zrfT5JTU7(+EOhWWDhSnl9z(!gi|Q}bt6!r+4@^0ppaS#jb8`w+g>&=zre)KQQO1w> z-$uYKZMu<#aE=dR1sKVnA-Cb+pOGc*>h?Mj`UeL7C%|AN zm;zJKX}65v@hyYyXti6$>$Jhq`rj~``UbHN=+)FO5@i+XWH47^H0H?1#I79^YD325 zF42}_qQHF|L#h71UI$)S-=qx=SMSA|`c-18K>8Y$SV_VTGbKi2TQZO~JwVD*(-t7H zdsGYC>3CZxCC>*f*)fxCAR!kQj~SWsI$PFoyeH3&s@O`;MX1 z!BF}hZE(X_eV>>rkbXcVwklzVWhF*qD>9JodVquop%qA*RC^4h4hGVXXoLM}8clr* zIQqB*9kwhn8q3HyTJqosA!Q4W7y`PsP7m?eF~B+)U_YY`e#cl@Q-4k(Y+eEwo0Axg z-ICEZ>p>f$_!hJ&h~|$$)xn_p6>YF|llnEWRR{PRDzO_9df24IXe=#5>AD9>$o#ZI z2^pYcD0MKD-lh$1oB!3+-_j>GF5!n=l^Bh^D1&Lt1ExJ{+5#qq++ABQh}7FL$~qWj zzoQM74A$?7ts1O9P>H1^Sg~P=(b$lTu|W^Ukfv?Hn1XceF_bzON`Isc{tpK0&j8aU z2|uh~Vl;M92Ga{3Fd@0$0;V41Q={|-f4P^vb9VS|Z!qLlVZxAKm!BSF%)Fw=oV682 z;=?P71V~#^gtlH$gt+ZQ8DbWE@c*;GaJ|F_|I>qOgST^BAKG>fSJ1W(oMdkwIMHqU L!0xo`T}o3+<O$;>rBxy>{X@T@7agzo}`V(pbCG?!uHcdj>Q!vmH+5|BDz9-G- zy_H04Yxl&}(VXME@7|fY@7+7!yYId?Z)BhF`F!u-;9njMevoZzW077>Ka-!)Yca5YPCD;I#za0={oxNWU@}8KWk?lUS$r&;*R8|oRvF; zR>b2@B#S0w!o7B8XV%^kci3uk3P^zDT`vvd0&7jmj`rA*Y$_eh+f(12N{rZPCq11? zyHio8FP$2TMV)j#1^uzTP5>5Vj7@fY)XDgKQ>OUp>ZaD0s%y2YYfgzauP)O6()z`h z%w(&qxu9l%qz+^{OK0GpVSo(oS~{>I6&X!9$!um|Q!F!T$D8`IqtRHZslBzWYXI%t z1;=Ct);NiTy~;`Ury{!@h0mSn-)W~u`qIu0JlU*lXe1ks#^VMZ6YE||J;P}`(bNOi zXPVBtX7R%F2R1lEPTEOEoTlZOjFZ6Qbut6q6CCKl_0H9)^zMQ1x|JIX+*oJ0E8JeV zFx)L`m(|syx=Su!IxylkJCaI9Qt4>Sz98eI$DH)L*s(WZpDUG^P-0I?n-du{CLY@% ztFcf`WKych17vd|E-RXfimt$ej(QJzEcc)@C-$H{2Q@%p-vlTmqzcdOG-w^0On*Iy~}-43fh+gVUNsU3mIk2K-EsAleB=;v7!au@U10z zN&>ck8GuZ_n13|H?M!xUDjFM#Inf3?mj>)vhk?H#>WnzaDAEID1r3>qoy|IFnLSKP zk0!IRgp<#)P8)Lb1m5Ng1PtLnt$BPqILtWFa6ZwHHF!e+^Stnh+^}UUZ{fe&52vqv zZT^pX%~mjG8%aBe(E(zOwIh}7YF*U6$g1N5`0YE#@AtzwY`E{k3(RG`Pxo4uWpv8M zcA~KztsQMkEJ2{~Y3|Lgf4j|jw%6GHde8YftS|3dF8TP zzgrE)yqx3;5MvF9W*vj{scPL?*;vIo4kp^OTL*!8B}pHMu^vRVPH^Di4PKDl0LJVm z*$86%9*A}W2Pxj*b(T$F%ny=m1~EPaqTW!)0qs=v-|x%DD*ofZOY6Lzy#r441*XuK~(D+Vi2tJ7JAerGHN}(6|zN(?5;@H!nP$HolfiGC3VbEPAD;AhBnR4 zs7X`3o$1e}W69x$Sf)Rg7>PT~y5{V(Gg#q?IGIc;y*wS+8OvhP2+Mb?lRcxHW%D94 zmPm}sg|&vXldwldoM=xvm6#$>OJH{1R;&r-7vi*BO$hOb@Z3>YtHXknla58?njL2K z+uOEmIgv!JxoP~>6T2w6bt|7E^DT21h_M@_Pp+huS}-dtgUTm-JgcC|(RiHNNrFs2 zee~uO{0p4Wla@_P;5IW!k^*UDBFjTC`;+2zx>{&&XSl6B+_88u{?mf+wL@!1({gNcC!^H(Qd4p`phgyBePQ;;zAJF6dKu zJ`{}WK~@(U?M|sHX#B^ZG060;5@&xLx0{~;X=IMbLol(D?Y;p8<3^B)arRBn6lB6y ziL*b6+s&IvZUJdz>c>McUy_af6bi_y~pz%{xf$1ZiZR$wM%cls4Rjg7JBf2@5E9Ls^hXY9(&} z0&X|I2-3(*mxtH_BAfh&C>UP?nHY?J8JdDjJ1gF!iAn@+_~A4IB|N)EF+yd~|kW9c(o9demA8`}|!$Fh5j z!JfE1oC!%qh5JP?=o3@q5%l}(r{(j7WWl#i_~0rMeOmpLaB_GE7j&VQCXl5+XYU}w*t2Z$N2tyaL`vD*oR(m zC6rMi&iJgrI+QAMu3iJVz*^`$9-r&CPG@`S;-LROCo`N^6m4y&@3u$m2zsF};Hxj1 zke66@mvF>=^V=v(cz*|lRW7_QU)tjC7HAp2Tfl>Ge%FJ?w@8%|z6YR8EYBk@ zun)|oc?KtZmbe5QAxJ2Qi%}qUrpr^D{SXa2%zQ;u-~;59XaZ50ca`S0&qMX*?miC{yppNgeB}}fqh^)@(fPE z>UMCR)oox3R&xU7S$&X~;}Dz?N%NjYL0bJ|iaAG;;%8j(vlMfVB(44@iVGo4iD5s% znavYuy7q?Ce4eeCpEo(M56nEx{<*6Cb2R%oAuG0@Gc0N7Arz$jhfxrmb4l?LSNsCS zgs~L=n=Ae)#rr^{TYiSZ=>@mAGcmrli?n>F4;MLfm*~&)lFw=ZknOG=?u+H zAH%K1Zu&T+((sp2kZwB4Dmc57;#XYps}$b=BK`3i#RY#@=6^vTW5Iu;u*UVnNyY*l z8m~j6n5x=rX*Qdx&GnkiH{n*#W-lOcJ}8a*H44(^H(3Q|h*JEPD}I~eTU_xmSNsme zoHI&SFll^uX!vo33XKUL{UtQG?kCzQasfaH&A>##L^$9fjh3GD1-~uq^QzZ>Y>K%G5I`9dS^P@_=@VC zqndYKR=x9*<{i$1J?|U@!$WXlE8Q>ynx%KnWEFR~;+d{^7R8+W%B$b&ir+^uC%@8Z zXMrmkY-U5K4mOqI#v5$TX5B|rXB^g?aY%K>ziQ678!h&n;X#G7Woh1AXqL{H$0|5~ zmg4!Y_&ka^f0kCCPq7+Bym58`&ZvV;rKWq>kYH$LE1ps9|FLHO)2jUkHT%C@(*FIf zq3yH?jM>2|?s3JPuDFZhdtLEDSGn}Zu1B&wL0^WJ(K{Hq>y z?9(SQ>p)|CGm+sAlU8AIkHc1QX&GJdQ{s=h`+WHf_0?)O2|gQpkz;U zBf4d7!X@HsN*@tlRTxV=q;{AG^Bo4bLF&Q@tJh)NK_|e8YYkNHM*k{YUK;>n>fJ5= zmbn!ygSusIBb4q_&?D|u7)#uvLTSGSB`$IlLy3zWH7Hdvl%haP$xPEShX|%GDEJX~ zD~u&RuY&0=4NP1bD*{u9%VIqI;s7XzXO71W_}3ir`M$dV)iEDp920@ZL2JmXV&ugE zFC~a2z~+KjlDPY<0yS~F!dT)nD(-I6aL4sa4|i^E9pE}<4N6rEr7RF*EY;jS!hP5ij+O+_c61G0#q3t@N z?Ro`ZVxPiT;-f0suG7%Q7Z4uWh$S8YzKEznsfwZW2_VJ`TQ_(>x{*NIqhLo|qcE1Z zS_RUW2Bc>JnIa$wzCvQy8h->1K~h6l6(j7^fRYldZuKyB8!>j30xL11FqTNE7)xpx zI}FeiVJvW@1|@f)p*pwp+yTUR!Rk&ANOuuPF$Ftfr@~lbSOwCM2Ba5C0*UXLYcZ*c zW73y^nDKA)EOS3`WGm1S+ZDzVgDQ@;X*l|6DI5uI2q-z|y<7uWbw{fE0VQS7djM>1 zVc|hy>_ZBy#Ab!D#0OQ3ZPGBty&uJc-YYdIRX3Er0mPJ99etQ!TCd8}__-27Jq zQWXQ~aUjOc*Prlk^dxb#M1hW2tT2{Xq~d6yhNE{%;Ye_oX30U1n=ESpt73rt10Jw4 z==}(6ZqR!QCDEq9O0+7BC0bOBg*A+EL#`LA_-2MjfE#mbP^w}mJ&OliCSQM!U^-vH zk2p_ZEHPgN(>x7K4W+;&xN*8mx0_hUjtPYgX(8^z{)82bFjHl@I{owECoGcrovd_OchEqG$`>4i()A8 zON$zmsu)Ty;Q^N!1&}GCP!gsBD^ag7mI$gCGc=6N!>u01ytJJkN!6fK#ZY<+57=9)dK*j* zNXJkTLcxykDU2oFd0Y8#^B5R@xXFp0CHOgA5s*YPq$N{!ew|iBSQR5IfX}UzU{wb; zhcN>s@hf1*{TUDOzY1fC|4}jahK4bI$ykK3KxYj~)%{M8|Hq6MtoRexIUr3VkX}=; zBVJV)OT3~2>8J*zu984nSOZdZ1L-|LjJvR2_!@8qar7Swbi}_aj3s`d;^;*UNBmg2 zC?JLSQ8W*~;77|Phd+LETpRwXc=$UDpengy^?sbl#jn}K-3toT#1VzD#92){KXpoxAqQfn$>yaX0a`QDVm@iVyK)B zet@m(NS4tN8ZEkXtlXviK2q0uUAaqdD7y5mC+I6Zav+)1LZD#0w;L*#gq9=0xb{Mao|+r~GI+9ltEn@kSXPA+8PUwj3*WDVO;4OS#m?-ZQ>Sgt&;K zU&_UtQkM$O&UvE9%bZB@@C#0)N_J8v$a*zQluK0;V)~QQ#`LG8HjY`kHjZ5-wJ{>) Mv@s%;(l+(~0O}XSCIA2c diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.exe b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.exe index 2a75edab9c36843d285d1146243494d5811faa3a..a3ee5a9c6f09246b1daf7e7bea9141b1c03e214d 100644 GIT binary patch delta 2122 zcmZ9NYiv|S6vzKF_p!UT%Wd!7cDs~TE|ivSw_=Nr)JOy@k2Xf7EMThjQMJ*kaO-Z3 zwxZh-!6$0HAt;)t35oF;v56syAADmZ#%PSFL}G$TjT)aHdzCuO_9nZvQLj!w|ilJUZ+GlpZ7;c$r#yy0h00H_g0`dkMgWcZGCaRifS_DM9AVf6y`eN0ws0sRqOQ3ut~XC z)uoDD*UX@vWrxjbC=tuD){dR#YO)lvEd)2FM^%_Fb83#tl;b&V268rPQx&~87-vjSSD(%-;&zRnPY% zJC_|hb}Tgu-mH*Cbh3*h*Gh}psL9Hon`Chs?*MLP|F@F!BZZ+&*A~^9M-sp~_KoZF z*iX4i$*b4*tYCkT`WZ5M#cD3GUaHdiiLlNIP>1uisl zh6_R2N8G&2^zoMF!=`WA@QrDi#s)KI)Zq!z##yx3cwH)Xkv@K)r;pd5_@d@Jj9_Dn z{FIhby`cg;1#U50guo@nVv$BG&Jt?^HE#mH zFa?3vw6-m?QESu3$3}2A=k)7WOkjYTf$7a$ILWkzrn#(myJ-X}F?9&_|YcQnc zn>0r?-yrMw9n2$3s?ZafNqlWC!RL6?JdZrn>>+=YYf!C6>yI;!CA6ry)##S@q8{HG zoK?byGGe&H@)53kSU!QLa1i_O45z(`J>>gjKRzLUF2{#nI$`S`C|;Kz9J#V1 zx<9ec$&VIC1~)G13JX)KoLhynt}wcKaAd>4jWUV?6 zA$|&YysT?L8rMX(yNy0@cl3hFz z7{a^R8})9t10nqJ2{VA-X9XhOVDU5Nk8h0zD_g}qQx8=J+!gRgzF$>E-Sm3yMv~vy zBp0zZzaszMB|A6m-@Ksv=~c6L_pg5Li>X6ZapAAw7xzTFUdh4aytM4^h+Kc)lw19$ PB{z3Wd38UE$@bVkP%=Sa delta 1791 zcmZ9NYiv|S7>3`ObJ^X~?RL*@yWN&nwp7ZtUF)L8luMLLMVoRjRT?Woh>$|$Knof~ z*w&JpO3p=evPr;4=@nVhz^B!)t*s^%G4%m0u$>ey5lr z=SKY(#GX_#OAnt%*d#2hss&aT0R}Z2u>Jm6)?Y546BYg^@?dVC4ucC*?vjA%l>&}q zALvd_h@wEI19oC4%+@m%*y@$jnV1l9(={_?kh=s&7+#XrpjSbL|6{YuhqSdg*uzC^ z4W-K%61N)5|BcGgV(K8;;aUxtlC@CDyau?a6ii_M6hJsVGngV>sU``g{%!zzX?BW} z|JCxLXMs0;_dR#qcE-h+YczaIQ%Gq8@xk7<-4SPG`hP-JM z(p<1p6zG|ws=kwQ*@TQcu9Gp)Xrnw{)OaS4xXtZN*YSy@7UFgw>DAE^FQk>O?FQ1U z$kqvUa*y>m@tHx_a)Vx)a-C}>y?P4V29NbQ;5^~xd6Scq@nX2eUL$SkjDOflh=hO2 zYJcEUW~|)cJi zrJa&WGgzI#V)GBO*|?4bQXH_H0Q&I}lL?&D+TLWF)_%L;+XO=hbk!6hAERdAn%4H_ z(9QK)P$vQy#AO*078Yu)kjBR1f-kwkC=tBCoMwQOc>Xr|DJ_p0A+#BS_eL$Cf^<+% zMln-!mgaoTR?XZB-F0fNBg@dOsiuLxe0Gz6#H<5qxNH%e4Sj0!x+xt5|7j{-Zqbs_sC%g5y46FkUXW6 zo+nq!3*=EbrujL&XN|8Izl3d=XZJEW!q?M}S2(>ZhVUa#PZ8px32{`BQ!t0DMGM)8 zPV!#OC+6sGp?0+4NjloK<#EmRc$SvUI70T~J(0yKc=&~H-s~&f3-0@kXPoC>-I$xt zRZQonx;{kn{~d)RcQdfiyZ(G9?hM?@;BRn7@=Zn0i2VEEgCf5);t0R9WUBD|eI>Qg zr_cTQ@zCK5EeA%rzNsJEJ=-6lYn^|k?r?WnDIsFJ%@krC6ob_kZ+xt;-+w3CAO`Z6qA8K#pHi1v zthz8&0vO}p<ZeE8APz*G#rdJKa#WXzx#JPR{z_-4FArSWv{mH~x#%s5&L525S;B cne|u3NAju47LkvXj$5)hn|CXJ7lYZrKRr<(yjH6(LJ-ou~f ztQOcoWh;wKMlWhB_-(DW($<@t`J$Oyv)-7mDjoHvcj@|^-v!5gQD^tu&vTyVInVE$ z@A;jd9@K;56&cf#Y(;K3@V%OIe2&tta{f_s0lz=5srkMxMZ$NTr{fm}%UVBb&Y$jT z`S@qIFtZ|krE7k@HhJ;)@YS&!XM0*+U(x;f&ULd5pQy|lU$SV8&plB&ujYd3PWmsK zj$D3c>WVTsYa^yUuf*SZy>5;BLf$vZ3HwZWsK}x@^rjQ>naiQAc2rc*&d7}y^!qZ$ z{mGKK%KTBZJ`rJ<%gX-3YZq7~B{??15%Fm4*x`M#0|UML;|E1n6SsLw;|KbhmSiMz zh^OSl;;A|1)M0bwDM@|Nc=s#O?%2}Q5Q%=@5qTxy1i78k2bGgMrv zI?c{cXJm2V(Kwc()A)#g{X|YDSr7=Pc&2Rb2t9f@`;9j)oufvNW!Hab@}3bK&Qm#5C2CN1?o9AtYh?q_+%_8WC;HN8mD8 z3$x&(FduG&Yv3mM0IY-9(f}LbX1D{g4E-!@hCOip&)d3BBq1$0Uc%7|4=n_az{eQB z2Dicy*ak=8cK9x&sD2OHLLb5>7*D~Sa0WgJzl1jaHGGQkIk*e{2%m;mgeO0PaRWyO zyaji|+pr6+Wb1oihGa^X^w2lcOBZ?>j4bh4yPcV+E7`ow)t;Rr_sA+V#b?f!H6@Z~ zr|z7+Rqw)r+EJcm&3$k=L@AvMdtoU=v7t)X&v-rLerQfe24DlUn?+$c%ot$>AwBgG zQX`CwCp1c+yJ(aU&F?ul1Yd;1keHyj9)q@^fheyh;2ZE1JO)q0qwsxr9DWMlf>!mn z;kS^v`Z_!bZ^Bc&Ao_0%CVOg6LaX+xFu>M1Qk@Pdsx#mj7=$0dd^qJcl%H28c8t%h z>qv7Z{HC)q1l&WFXB~5E?OkqJq~5>cCU3cIeQEYSb+nmlRfT4}s@?I->O^##jqbI7 zUb}j6(NYrKX8je54oQu%L5clG%-$pWX-l@$dG^%r-1gd(q7n%EMDAXgrlhoXu>`dO z5n}e@*=A$rhYf{JjnB+)2>#DUzB*2brAeOzz RdGy@bhP`UG>Js`^{R^ZfE5rZ* delta 1238 zcmaKsTS!zv7{_O3w{6$LlyqCy)m=BN5OdQ)yQgF(6=G2k-OFxvv3o&SdyFo0fwYl= zFjy!uB8uGuQBV{;1@#coLl4sJp@#@FEa;`nZ_e&RKGes~`F-E_pEEPx%@h5-;R~7 zTkn>Bdow?>BKv0Z?cLGC1IfBuGOJgmN90SR9#As3lF7|ZCB;Vj}Qa5kI|=fFl-3s(yf5p_5_Nz}tO zI1e6&bhdmVOUK5|i>G%(Iu#>{Qrc6qDr^D$jJpVqgp1)AxCD-c97@MTxRSU6u7VUL znA~D+r;rA?p4cp~1vbNtkTHxNx0d2;C1Fmu6|RHZAWdwCakvAXhh}Fqq$sN!Qj~QO z?uM5j=Vj)5AVYR@Cj^(AK+2=8OGT&>pPCc?%f3GS}_|^ud#m zOJ$DJDuAc$r-RflwE zb(3=3>Uim(QP-M>V^f^ixKfl3ntZvFH;2$e^1m zyZEDz%4kxwRAyWjv$-;XHvBJ5zo-eRUZwkM{Qvo`Obw`#yb(e?;O$!4ai(@LN+Hec fO^{ZgmrbAJGnd}nFMqPGMamFgnq4Z{+$sM6&cpJ@ From dea43086183b7cf8a4744cf44ee9420a080d0081 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Jaworski?= Date: Mon, 31 Dec 2018 00:50:24 +0100 Subject: [PATCH 5/6] repaired lemmatizer --- concordia-server/socket_lemmatizer.cpp | 39 ++++++++++++++++++++------ concordia-server/socket_lemmatizer.hpp | 3 ++ 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/concordia-server/socket_lemmatizer.cpp b/concordia-server/socket_lemmatizer.cpp index 0cd6aee..137f4ab 100644 --- a/concordia-server/socket_lemmatizer.cpp +++ b/concordia-server/socket_lemmatizer.cpp @@ -1,6 +1,9 @@ #include "socket_lemmatizer.hpp" +#include + #include "config.hpp" + #include SocketLemmatizer::SocketLemmatizer(int port) throw(ConcordiaException) : @@ -79,20 +82,38 @@ bool SocketLemmatizer::_send_data(std::string data) std::string SocketLemmatizer::_receive(int size=512) { char buffer[size]; - std::string reply; + std::string reply = ""; //Receive a reply from the server - if(recv(_sock , buffer , sizeof(buffer) , 0) < 0) { - throw ConcordiaException("Receive failed"); + bool dataAvailable = true; + while (dataAvailable) { + int amountReceived = recv(_sock , buffer , sizeof(buffer) , 0); + if (amountReceived < 0) { + throw ConcordiaException("Lemmatizer: recv failed"); + } else if (amountReceived == 0) { + dataAvailable = false; + } else { + buffer[amountReceived] = '\0'; + reply += buffer; + } } - reply = buffer; return reply; } std::string SocketLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) { - _connect(); - _send_data(languageCode+sentence+LEMMATIZER_DELIMITER); - std::string reply = _receive(512); - _disconnect(); - return reply.substr(0,reply.find(LEMMATIZER_DELIMITER)); + for (int i=0;i<5;i++) { + try { + _connect(); + _send_data(languageCode+sentence+LEMMATIZER_DELIMITER); + std::string reply = _receive(512); + _disconnect(); + return reply.substr(0,reply.find(LEMMATIZER_DELIMITER)); + } catch (std::exception & e) { + _logger.logString("Problem with lemmatization of the sentence", sentence); + _logger.log("Waiting 2 seconds and retrying..."); + sleep(2); + } + } + + throw ConcordiaException("Can not lemmatize sentence: "+sentence); } diff --git a/concordia-server/socket_lemmatizer.hpp b/concordia-server/socket_lemmatizer.hpp index 4f5e9e9..13d5949 100644 --- a/concordia-server/socket_lemmatizer.hpp +++ b/concordia-server/socket_lemmatizer.hpp @@ -9,6 +9,7 @@ #include +#include "logger.hpp" class SocketLemmatizer { public: @@ -34,6 +35,8 @@ private: int _sock; struct sockaddr_in _server; + + Logger _logger; }; #endif From c800fa7b574dddd974366ced08ce5e42f7238cb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Jaworski?= Date: Mon, 31 Dec 2018 11:13:16 +0100 Subject: [PATCH 6/6] lemmatizer fixed --- .../LemmaGenSockets/LemmatizerListener.cs | 20 +++++++++- .../bin/Debug/LemmaGenSockets.exe | Bin 7168 -> 7168 bytes .../bin/Debug/LemmaGenSockets.pdb | Bin 15872 -> 15872 bytes ...ckets.csprojResolveAssemblyReference.cache | Bin 13052 -> 13122 bytes .../obj/Debug/LemmaGenSockets.exe | Bin 7168 -> 7168 bytes .../obj/Debug/LemmaGenSockets.pdb | Bin 15872 -> 15872 bytes tests/lemmatizer-test/.gitignore | 2 + tests/lemmatizer-test/test.sh | 10 +++++ tests/lemmatizer-test/test_corpus.py | 36 ++++++++++++++++++ tests/lemmatizer-test/tokenize.sh | 7 ++++ 10 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 tests/lemmatizer-test/.gitignore create mode 100755 tests/lemmatizer-test/test.sh create mode 100755 tests/lemmatizer-test/test_corpus.py create mode 100755 tests/lemmatizer-test/tokenize.sh diff --git a/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs b/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs index aaebab9..28afaf9 100644 --- a/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs +++ b/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs @@ -54,6 +54,12 @@ namespace LemmaGenSockets private string lemmatizeWord(string languageCode, string word) { // exceptions + if (word.StartsWith("ne_")) + { + return word; + } + + Dictionary> exceptions = new Dictionary>(); HashSet plExceptions = new HashSet(); @@ -76,6 +82,7 @@ namespace LemmaGenSockets } + string result = ""; string[] parts = word.Split(wordInnerSeparator); if (parts.Length == 2) { @@ -85,11 +92,20 @@ namespace LemmaGenSockets firstPart = lemmatizersDict[languageCode].Lemmatize(firstPart); } string secondPart = lemmatizersDict[languageCode].Lemmatize(parts[1]); - return firstPart + "-" + secondPart; + result = firstPart + "-" + secondPart; } else { - return lemmatizersDict[languageCode].Lemmatize(word); + result = lemmatizersDict[languageCode].Lemmatize(word); + } + + if (result == "") + { + return word; + } + else + { + return result; } } diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe index a3ee5a9c6f09246b1daf7e7bea9141b1c03e214d..dfb65386f4a15752946443f376222a3041a5cdd9 100644 GIT binary patch delta 1940 zcmZ9NduS9#9LK*iJG*=D+}>U;mw3^;CYpf2cP-P?=`=f z`R(3t`*8b*d-)%?N^ehOcS^^4*ddp(CjFIGAmOHX)?LGz*oa$Xh2(`9P|q?VhmOWZ zpd$ciG&2ChEk;_)D|na{dYJTp1(BZw3^+A_&o(S0>xHn*_Bmu8jFZkcSWEPq`6lUA zNAeUipQ(_iKa!0+1r3t11Egc0ivBxrqF+TxIsLIkQ=@zm=SwkLM z#w3HgRip~@DNFh(G6p=!`%~#G8%PeuYZ8NWBQmiiyv}42Ntonwrbwy%wA`^J96D;+ zWz?Tph$IB?9-R~OS-Rn*rNwKen`ty+me--wzBFBjyrGq}rjsSP`eA{q!_km2P-FWu zr_ES}-Dyz)vA7a!!;CxANMn!X<(6TYX^px?WmMT6x@CD&y5(`G?R--#J?>DiQTe(v zQ;xJupuJ;sbkq*Q3YyL=Ql@9ShxLu~NL;3zquMW!H?)L)gXlz)W z!wAV?DKA~!wTMPRS?9Tnjk#>LTQnvYPH6@vbIIFyORK^qStFv%Pnv^@I~4<5YA!4J zZ^euv*nZMqgJiGLEMXsW13Sb$q95(FjWt@E76KzV0$c185u8{2MAR3AfqLy8kD*Vr zX>~YGG|)*J1K)DVg~SN%i+Tz>1IZJL7bt~+-TXRfnzVY758)p5GeGS_52K&l`Vy^G zbUvE>!sSe(Ei&vDGg*OQgEp7NFcr_T6-gOb#J^Mm!x=T(6W^+4H^*&5Sil(8sWpn| zqnU>9s3(Cvc!$c>ai6*pD8bN{F&%cJvZ*Wv5^7e8@6w)U^yvU|8$7jzyP6kUHAdeJ zxmGRFhh}0JGZg12b}Du$u2fv3*hj2JzmhjAZd3f8Scd>|NE2n|qGABI#XMZWQL%v7 zA-W1QJm4#cm-tRZl~ZDzIm{!COcBL0z7GkEXfpUbc?R%1KS&!L;GdGl&_BZwT*i+$ zMzQB`l6aB04p)hzoDr{xUy0rPCcG?;JH)T~2nw=_e<;Ov&e;{5<)&hgdD#su!cL+G zHz}+|i<5kjuC9oaRBkOhib-r5_1G$+jkUxH*iLM~8^jFu6K5&TRa~g_&*3oXb4s&T zauL?!OH#dt>%>9)$y(8kFVM)6wA8y0!?K$ypH@LHQiYJZbnzxjpVY z)}|HlBrbJ3eKtE&UhTWa_1sWjeyH3MoXP{vK=0zsTl!w_AKC`@g{qWcZo&rix?8HA z3CGrd5E@wC)p5M3@!VGvUOMM~Q`OzmWL8y27V}VbWI1~hq52RHan3^okz_pN``=Yp z0iGo1Ho`VDlFVi{$SV}m!Fl=R@JB3r<5si$u*g3-BAN)?0aoP}<#zq>bLzLY zaL17cS2vbUe7?7wjCfe&?D(IjjJhV}WGNM67u=P0S2+}Yp!2_Gl&!kEtki1$2enT! Aga7~l delta 1814 zcmZ9MU2IfE6vzKFclPe??RL9&Z@b-=B3s(hExSOeLQ230LJN(N6c!|4qai#PFy)rE ziBgE$8bO0WU9SX16C@!q#E*!5X+$4TK}-k_5(A>4F=A4q1iv2q3izKXR&aLnoBy11 zX6BsPnH_E)Zhzq!+0j|rcZV1%9orzXfe_81pXLE|LA}fe8%47i4n{;V(=#8Yts*5t zxY7W`Vt~bw4cLM3T1UHOLKLfoj)04SYQS*@(qvhABFP5?IoSBlDn9T@+-Ad zx$3#C>FdIaxRLxra8>G%7uB3hXgXt3^p?40`D(~r%tl%M9kjYppXobZ^mePNJ~U}( zKtIo?&vmj@I(sjiyL|y=;({?%UtGYrY=%LxNapw1z(759dAho8V5Xj_m{!#10XY;i z9KTVk_Ay*OuuY*c$?xv_BS7Qde}6H;j-JUg8B^R#%A>q^&l!&!^Jwpr1v?(+w7c%UIS z;?Jd=<%ZHe?+?=t`K`L=HnH}lH*VbU%HfuWeIAsPZ3`}kw%r|L2@OvGkF)<<>%w6F z*6oiB=xPU}z&iE~>kBwTozQya`mPo1FLHcWu*Y63+JcjIQ}Hd!Mw8V15kpo5&gvXd zDL~7shPw@8Qd=$>{j*`t5Mn>=m!bKvv2==q(ng<}Aj{CsGFDqTD-J=OL_|HUlK9y0 zMU^Zn8%b+IrtpBuS+zJ!+E_@7jSr>fax#glD#>Xlp!tsBDK25-N%;jWbykx4HB9ir z0=yM$f*)P_Vyj6ln;Kn^dStOgfyY#fC<<({+C&Pou~2kp^u!VT!W0Bfn9<&JuNh6; z^bKN>5a=;`j9@cI7KVBKfW3H@>4q@I8wix}m%K-WaNdk2SF!Q+q%S>SbqGFvh+s>k zrg+7QqSq39_j<1y(m@NEzd~fN%b28up_TW7En-t_Zb%|Uj#}O52d`G@7e=uA9N)E|C$WP>TLnWf(D=R9FV*|!HYo=8} z{Wb*5NzL5VrPMe_HgC{TJ)NCV` z+!}Jz_3r2q=dv8#SKcUt6BU`@+lmE=@wLnL%zpj!>i4cL?Jb<0*&JjO>#W9bEGQ;w z+NQ^)jLUdjI#tOI8Lvv-FXAD?NJdHz2~Vhuka0UBJmGcuHA3M`uIhq2bbKU8+{@7xhtblXNT{s zQk}ZSALM(j|FG)S7yRppE6UfaUgxp$q^savclJUFeW^0+X(3DKir|^zC%Te*yZ4Cr zCFJ$!4}-xQ;*34OPOHHZHF#AGJ*!e< zDs|pkR;H|>Nh?)irT$XGQMJ!&4R>1mLt;suM5NYhwox=kkA}*Y#;oiL()@VK&g7o1 zo}E1po8dBXdDHA-Jdb^`g_!<2bgFooN-vc(GoI5URjJ}Jk4QI_HB^r3s_K=QIb&4j zQE7ELtG8PNjVv-vs`yKo-@~z-Pd#;WG>F)yd>hWiE5BZ|ug564xyn{YAS zi7S!CZN{V-*Wp^+gfVg!=4UgpNFX4LfmvI9g!+UTP zH{zRkFFKgO571oiEV8)$5pKrwtS|Qwe3_f@4c<@wEpEZzkY?MzBfGVWxD_23z^xd> zZ5YDsSdTmKR(uH8pjkME^v;gsBiM?MVp}yqH$gl0V1X1$u`FRZ!MWm<3jK3UOoeoK zQFx#sQ_I%lxw!Jbb93he#a!nqBNw@D_Bm3SXT?me;qwV~bL?hK6DF|;pTY(BG%mz` zk-?jhj@Zk&S&|~)nUG;@Li6kH!h_`WR1P66k;6z!GUM&|0y(V_T4-;@5j1Uo5zTcq zGAA>iSZJ|r+Dwb>SMXIck1u(i;B|Zr&)`vf52@K7;~V%r@(kFQ@g$n|ox;Cy9IxR4 z%;TrOh50yv1vrUicp7iSDO1mMWtsE-;(h9!+D#sn)|+| zE8_0b(m5s<$7+*A`53Q0kv@g~tjn+0znm*O^KkIH%o3sPDznL>L z=iWJ&i|*20SMvO&4UIJ&N<{9opWb%2Yg*HV#piEDm-FgWr!GS>g( z%T3AsMIV2?YR1+2OUjqw4UjH z9?(AH6bTpmOdwpT{AOmjO0}Eqp;^nK%&1nShgJ6>)l;R?^PHX~PS5Zp<)nA0^cB_H z=cIoSal|WesV%nmd*roOk&sQBZTTYJ0y7pa4-9bMURK!I0;3`q0^bs8A#$Cy%gk4i zbU?GbiO6Ci>rBJcS(!DO$V4Im_u$lxPU<`feq*Tk31)85qXc!69R4c7p}wS*nuS+ zeLecbZ~CSu&AsW-)NBdJcq!&2MCEr@j#{YuYYRu~B!lg4#Bs>^br3gW z6;g-pnYe@TtGE-XyL^iEXpcbMWDnzHiVpS36qWjD&YM}l`KYF7&L^7R4cLvJU=P}T z)KT}NO=ysM>H$26r?3xC;{iO2hww5UMqA4xxaKw;HtUCY5}zWQ^&iOQObrLo*6=g* za&%slE<}=QAD+WW_&Jv0kh^_GzdF9+*0tKUd^KR!)|IOPcURpx=lF8}iP}PU+UpDR zik|YPYulE5TkEzK^B`Vfj>VTdnQlDQ*=%P&u$N358_D&QbT->~G#8MU=QyBz^dEEf z>&Sf>@~?gNt>@l2pT<9viZ|jBnUGDYq-t(1Kj-zvn6sagZFbIlm#9z)^GhQ5KmYFL OlsZoDI5+<0i05x;DFYq= diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csprojResolveAssemblyReference.cache b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csprojResolveAssemblyReference.cache index 08e454dd60c5e8bc92e0955212eca78ceee26490..58d7cf59276ad0e10d9e5aee84f9d2140ef4dd42 100644 GIT binary patch delta 304 zcmey9dMIr}CnKZ4<}OAiHkN8_PTS2B*zFiKK`f7p9$Z{pY^n?l3|wU3TbH5X&t=58@b zM#kLDOT`r#8EZBlmr!Ek;g16u4g^e$36meFN^EA9Z)9U!Ik{Cel8ZkVBE`V8`JSo> z6XV9spVg<6GJ-E2I*i;!97`Vzz^bGWj4U7#f zxfsQO0>VJd08%vhJBK1z-((q12xGG&ryV22sAeuGYw}TUsH#^`#%6w=YA(jW&D~;> zj6iCsxFU!;E}_K6#F#L-&q#bTk9-pwW8ma|)kr4Be6R?&+Ha5|c8%#QK#BP}K#4lA zgtV?5P-JtIAuFRK<0_!bf`OO?hT7-=yvUIOW5 GU;mw3^;CYpf2cP-P?=`=f z`R(3t`*8b*d-)%?N^ehOcS^^4*ddp(CjFIGAmOHX)?LGz*oa$Xh2(`9P|q?VhmOWZ zpd$ciG&2ChEk;_)D|na{dYJTp1(BZw3^+A_&o(S0>xHn*_Bmu8jFZkcSWEPq`6lUA zNAeUipQ(_iKa!0+1r3t11Egc0ivBxrqF+TxIsLIkQ=@zm=SwkLM z#w3HgRip~@DNFh(G6p=!`%~#G8%PeuYZ8NWBQmiiyv}42Ntonwrbwy%wA`^J96D;+ zWz?Tph$IB?9-R~OS-Rn*rNwKen`ty+me--wzBFBjyrGq}rjsSP`eA{q!_km2P-FWu zr_ES}-Dyz)vA7a!!;CxANMn!X<(6TYX^px?WmMT6x@CD&y5(`G?R--#J?>DiQTe(v zQ;xJupuJ;sbkq*Q3YyL=Ql@9ShxLu~NL;3zquMW!H?)L)gXlz)W z!wAV?DKA~!wTMPRS?9Tnjk#>LTQnvYPH6@vbIIFyORK^qStFv%Pnv^@I~4<5YA!4J zZ^euv*nZMqgJiGLEMXsW13Sb$q95(FjWt@E76KzV0$c185u8{2MAR3AfqLy8kD*Vr zX>~YGG|)*J1K)DVg~SN%i+Tz>1IZJL7bt~+-TXRfnzVY758)p5GeGS_52K&l`Vy^G zbUvE>!sSe(Ei&vDGg*OQgEp7NFcr_T6-gOb#J^Mm!x=T(6W^+4H^*&5Sil(8sWpn| zqnU>9s3(Cvc!$c>ai6*pD8bN{F&%cJvZ*Wv5^7e8@6w)U^yvU|8$7jzyP6kUHAdeJ zxmGRFhh}0JGZg12b}Du$u2fv3*hj2JzmhjAZd3f8Scd>|NE2n|qGABI#XMZWQL%v7 zA-W1QJm4#cm-tRZl~ZDzIm{!COcBL0z7GkEXfpUbc?R%1KS&!L;GdGl&_BZwT*i+$ zMzQB`l6aB04p)hzoDr{xUy0rPCcG?;JH)T~2nw=_e<;Ov&e;{5<)&hgdD#su!cL+G zHz}+|i<5kjuC9oaRBkOhib-r5_1G$+jkUxH*iLM~8^jFu6K5&TRa~g_&*3oXb4s&T zauL?!OH#dt>%>9)$y(8kFVM)6wA8y0!?K$ypH@LHQiYJZbnzxjpVY z)}|HlBrbJ3eKtE&UhTWa_1sWjeyH3MoXP{vK=0zsTl!w_AKC`@g{qWcZo&rix?8HA z3CGrd5E@wC)p5M3@!VGvUOMM~Q`OzmWL8y27V}VbWI1~hq52RHan3^okz_pN``=Yp z0iGo1Ho`VDlFVi{$SV}m!Fl=R@JB3r<5si$u*g3-BAN)?0aoP}<#zq>bLzLY zaL17cS2vbUe7?7wjCfe&?D(IjjJhV}WGNM67u=P0S2+}Yp!2_Gl&!kEtki1$2enT! Aga7~l delta 1814 zcmZ9MU2IfE6vzKFclPe??RL9&Z@b-=B3s(hExSOeLQ230LJN(N6c!|4qai#PFy)rE ziBgE$8bO0WU9SX16C@!q#E*!5X+$4TK}-k_5(A>4F=A4q1iv2q3izKXR&aLnoBy11 zX6BsPnH_E)Zhzq!+0j|rcZV1%9orzXfe_81pXLE|LA}fe8%47i4n{;V(=#8Yts*5t zxY7W`Vt~bw4cLM3T1UHOLKLfoj)04SYQS*@(qvhABFP5?IoSBlDn9T@+-Ad zx$3#C>FdIaxRLxra8>G%7uB3hXgXt3^p?40`D(~r%tl%M9kjYppXobZ^mePNJ~U}( zKtIo?&vmj@I(sjiyL|y=;({?%UtGYrY=%LxNapw1z(759dAho8V5Xj_m{!#10XY;i z9KTVk_Ay*OuuY*c$?xv_BS7Qde}6H;j-JUg8B^R#%A>q^&l!&!^Jwpr1v?(+w7c%UIS z;?Jd=<%ZHe?+?=t`K`L=HnH}lH*VbU%HfuWeIAsPZ3`}kw%r|L2@OvGkF)<<>%w6F z*6oiB=xPU}z&iE~>kBwTozQya`mPo1FLHcWu*Y63+JcjIQ}Hd!Mw8V15kpo5&gvXd zDL~7shPw@8Qd=$>{j*`t5Mn>=m!bKvv2==q(ng<}Aj{CsGFDqTD-J=OL_|HUlK9y0 zMU^Zn8%b+IrtpBuS+zJ!+E_@7jSr>fax#glD#>Xlp!tsBDK25-N%;jWbykx4HB9ir z0=yM$f*)P_Vyj6ln;Kn^dStOgfyY#fC<<({+C&Pou~2kp^u!VT!W0Bfn9<&JuNh6; z^bKN>5a=;`j9@cI7KVBKfW3H@>4q@I8wix}m%K-WaNdk2SF!Q+q%S>SbqGFvh+s>k zrg+7QqSq39_j<1y(m@NEzd~fN%b28up_TW7En-t_Zb%|Uj#}O52d`G@7e=uA9N)E|C$WP>TLnWf(D=R9FV*|!HYo=8} z{Wb*5NzL5VrPMe_HgC{TJ)NCV` z+!}Jz_3r2q=dv8#SKcUt6BU`@+lmE=@wLnL%zpj!>i4cL?Jb<0*&JjO>#W9bEGQ;w z+NQ^)jLUdjI#tOI8Lvv-FXAD?NJdHz2~Vhuka0UBJmGcuHA3M`uIhq2bbKU8+{@7xhtblXNT{s zQk}ZSALM(j|FG)S7yRppE6UfaUgxp$q^savclJUFeW^0+X(3DKir|^zC%Te*yZ4Cr zCFJ$!4}-xQ;*34OPOHHZHF#AGJ*!e< zDs|pkR;H|>Nh?)irT$XGQMJ!&4R>1mLt;suM5NYhwox=kkA}*Y#;oiL()@VK&g7o1 zo}E1po8dBXdDHA-Jdb^`g_!<2bgFooN-vc(GoI5URjJ}Jk4QI_HB^r3s_K=QIb&4j zQE7ELtG8PNjVv-vs`yKo-@~z-Pd#;WG>F)yd>hWiE5BZ|ug564xyn{YAS zi7S!CZN{V-*Wp^+gfVg!=4UgpNFX4LfmvI9g!+UTP zH{zRkFFKgO571oiEV8)$5pKrwtS|Qwe3_f@4c<@wEpEZzkY?MzBfGVWxD_23z^xd> zZ5YDsSdTmKR(uH8pjkME^v;gsBiM?MVp}yqH$gl0V1X1$u`FRZ!MWm<3jK3UOoeoK zQFx#sQ_I%lxw!Jbb93he#a!nqBNw@D_Bm3SXT?me;qwV~bL?hK6DF|;pTY(BG%mz` zk-?jhj@Zk&S&|~)nUG;@Li6kH!h_`WR1P66k;6z!GUM&|0y(V_T4-;@5j1Uo5zTcq zGAA>iSZJ|r+Dwb>SMXIck1u(i;B|Zr&)`vf52@K7;~V%r@(kFQ@g$n|ox;Cy9IxR4 z%;TrOh50yv1vrUicp7iSDO1mMWtsE-;(h9!+D#sn)|+| zE8_0b(m5s<$7+*A`53Q0kv@g~tjn+0znm*O^KkIH%o3sPDznL>L z=iWJ&i|*20SMvO&4UIJ&N<{9opWb%2Yg*HV#piEDm-FgWr!GS>g( z%T3AsMIV2?YR1+2OUjqw4UjH z9?(AH6bTpmOdwpT{AOmjO0}Eqp;^nK%&1nShgJ6>)l;R?^PHX~PS5Zp<)nA0^cB_H z=cIoSal|WesV%nmd*roOk&sQBZTTYJ0y7pa4-9bMURK!I0;3`q0^bs8A#$Cy%gk4i zbU?GbiO6Ci>rBJcS(!DO$V4Im_u$lxPU<`feq*Tk31)85qXc!69R4c7p}wS*nuS+ zeLecbZ~CSu&AsW-)NBdJcq!&2MCEr@j#{YuYYRu~B!lg4#Bs>^br3gW z6;g-pnYe@TtGE-XyL^iEXpcbMWDnzHiVpS36qWjD&YM}l`KYF7&L^7R4cLvJU=P}T z)KT}NO=ysM>H$26r?3xC;{iO2hww5UMqA4xxaKw;HtUCY5}zWQ^&iOQObrLo*6=g* za&%slE<}=QAD+WW_&Jv0kh^_GzdF9+*0tKUd^KR!)|IOPcURpx=lF8}iP}PU+UpDR zik|YPYulE5TkEzK^B`Vfj>VTdnQlDQ*=%P&u$N358_D&QbT->~G#8MU=QyBz^dEEf z>&Sf>@~?gNt>@l2pT<9viZ|jBnUGDYq-t(1Kj-zvn6sagZFbIlm#9z)^GhQ5KmYFL OlsZoDI5+<0i05x;DFYq= diff --git a/tests/lemmatizer-test/.gitignore b/tests/lemmatizer-test/.gitignore new file mode 100644 index 0000000..b87b494 --- /dev/null +++ b/tests/lemmatizer-test/.gitignore @@ -0,0 +1,2 @@ +differences.log +corpora/ diff --git a/tests/lemmatizer-test/test.sh b/tests/lemmatizer-test/test.sh new file mode 100755 index 0000000..1e4aa0b --- /dev/null +++ b/tests/lemmatizer-test/test.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +./test_corpus.py corpora/A_en.txt en >> differences.log +./test_corpus.py corpora/B_en.txt en >> differences.log +./test_corpus.py corpora/C_en.txt en >> differences.log +./test_corpus.py corpora/D_en.txt en >> differences.log +./test_corpus.py corpora/A_fr.txt fr >> differences.log +./test_corpus.py corpora/B_fr.txt fr >> differences.log +./test_corpus.py corpora/C_fr.txt fr >> differences.log +./test_corpus.py corpora/D_fr.txt fr >> differences.log diff --git a/tests/lemmatizer-test/test_corpus.py b/tests/lemmatizer-test/test_corpus.py new file mode 100755 index 0000000..8c986bb --- /dev/null +++ b/tests/lemmatizer-test/test_corpus.py @@ -0,0 +1,36 @@ +#!/usr/bin/python3 + +import unittest +import json +import requests +import sys + + + +def lemmatizeSentence(lang, sentence): + data = { + 'operation': 'lemmatize', + 'languageCode':lang, + 'sentence':sentence + } + + address = 'http://localhost:8800' + response = requests.post(address, data=json.dumps(data)) + return response.json()['lemmatizedSentence'] + +corpus_file_path = sys.argv[1] +lang = sys.argv[2] + + +line_count = 0 +with open(corpus_file_path) as corpus_file: + for line in corpus_file: + line_count += 1 + orig = line.rstrip() + lemmatized = lemmatizeSentence(lang,orig) + if len(orig.split()) != len(lemmatized.split()): + print("Different length in:") + print(orig) + print(lemmatized) + if line_count % 1000 == 0: + sys.stderr.write("Done %d lines\n" % line_count) diff --git a/tests/lemmatizer-test/tokenize.sh b/tests/lemmatizer-test/tokenize.sh new file mode 100755 index 0000000..442a2f8 --- /dev/null +++ b/tests/lemmatizer-test/tokenize.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +for corpus_file in `ls /mnt/storage/rjawor_storage/copycat_corpus/cleaned/*txt` +do + a=`basename $corpus_file` + concordia-sentence-tokenizer -c /home/rjawor/concordia-server/concordia.cfg < $corpus_file > corpora/$a +done