From 329421b0c17509d596cc73b58048c32c09350476 Mon Sep 17 00:00:00 2001 From: rjawor Date: Thu, 17 Jan 2019 12:20:36 +0100 Subject: [PATCH] two step search in cat --- cat/concordia_gate.php_pattern | 2 +- cat/favicon.ico | Bin 11078 -> 15086 bytes cat/index.html_pattern | 4 + cat/js/cat.js | 85 ++++++++++++++++----- cat/publish.py | 2 +- cat/versions_available/europarl_sample.cfg | 2 +- cat/versions_enabled/europarl_sample.cfg | 1 + cat/versions_enabled/jrc_enes.cfg | 1 - cat/versions_enabled/stocznia_plen.cfg | 1 - db/recreateDb.sh | 4 +- startAll.sh | 9 +++ tests/addAlignedLemmatizedTM.py | 1 + tests/addLemmatizedTM.sh | 4 +- 13 files changed, 89 insertions(+), 27 deletions(-) create mode 120000 cat/versions_enabled/europarl_sample.cfg delete mode 120000 cat/versions_enabled/jrc_enes.cfg delete mode 120000 cat/versions_enabled/stocznia_plen.cfg create mode 100755 startAll.sh diff --git a/cat/concordia_gate.php_pattern b/cat/concordia_gate.php_pattern index d3ccbf3..4318f09 100644 --- a/cat/concordia_gate.php_pattern +++ b/cat/concordia_gate.php_pattern @@ -2,7 +2,7 @@ $url = 'http://@concordia_host@:@concordia_port@'; $intervalsArray = array(); -$data = array("operation" => $_POST["operation"],"tmId" => intval($_POST["tmId"]),"pattern" => $_POST["pattern"],"intervals" => $intervalsArray); +$data = array("operation" => $_POST["operation"],"tmId" => intval($_POST["tmId"]),"limit" => intval($_POST["limit"]),"offset" => intval($_POST["offset"]),"pattern" => $_POST["pattern"],"intervals" => $intervalsArray); // use key 'http' even if you send the request to https://... $options = array( diff --git a/cat/favicon.ico b/cat/favicon.ico index 1f57aac3b1204f6bbf67e7db6aca94b6defaf844..b1d2d2797dc5ba7b62ef222bdc6b1d530e99d3a4 100644 GIT binary patch literal 15086 zcmeI232;@_8OLuxgCer2Qxye9>(uJlTC3F-E86OycC;#Wuy!12vEo!|b*f@r3W_42 zVo^jk0TEe55K!5dAOZmakuB^5gdh+?B1uSIl6U+2U2-FYMMZ2K=e?PebKkw^obUhr zzvZ0!y#zs8P&2sj!hpVMF#Mb#_(c!|O`BGJuO9^KxmTaw=l2{F1f3ahE@PMyw5EAK z`uuQjm-?GxAjLq6ffNHN22u>{&p;T4HHwOgj^DLwSHs-g+|zgL*wL8Qgm&i6ojZR} zP*6~}w6yft{b&9Ez7-Z0rf=WA{j4opw%o9Btu3Wit(TWu-Rxe+^ ze1nbaby-FknE``~8hK-Ux>c8y1ZlJ$iIFZrr$V{P^*aPP1pv4i_(8T(o-i z>Y1N@`e_?{@8qMY2Yffm&dz=cu0NG$c@^*LufHDl>(?*r+qZAnzkmPe`nAfjgA7Z1Zc}*-w`0eBS;0bN&sV?fQJts_<5b^TbYp+GNQ-=3^ zhVGd;IXS=nIy#W=PC`yEY+0(Wid%nGp1dwwwk-VMgAXFE^dn`#SHv-~9L}3JFZ}4E zk3!~0bHwU%aeSB`*>u1J}e)aCV?}o-0&$IO>^7EJGDX=#L`yh0$(7whT zz7xu+aKGjRS>LNb#j@)WX{YPwaq8-(RToiqGjB%xG z$C=aMY%4n4dN@0<@6^WDJ;?_Z`nSGF&W^Av8+}&v?Ah}^;`dze%+P1`EqN2OSe2!G zWsB3CB$vj#wvl_s%Z>NdU4D>LjxQ$FMLk@{I5XZ{&zRxaX7cQ{2h+j5lbrtGoH=t! zo#T8t7e8E=l4AyT@811v=MtaSj~X?qNdDzm-jyqU?O{H$x5=aV#vH1S@}LgR^`6+A zM|@`?YZ!JYHm3Abu!P1$JkGdAw`HGw_SyN>bs)E2O-|n?XZp5rCMI%1F*rKF*9q|) zVxfoJxhI(?%`f_oa^zEda;WZNkvn5ZU7RNlVY{&O`-`#bedv&}haRV>+CW%j1hTqtUjPD_RkPohS<1D2i6F|7U6j1 zl~*D^GA^AX2gZmo&fL3|(V|6*H0*f~Iu)5S#AN(9SDQHxn{RNeA9Q*`4BFeV>ZmR7 z^U3UCRes~x7hZVb4s4P%KJ^{_MxSu5*x@3|_xc|kMY@PzTe!A(o$Fqx9O5X76Jwb1 zZI$8Y#TQ>pCqKP_4oQ7cJ(Q;pYB%KNIF8sl1>0mgKen^+XG~(lA~4(6lZc{&bGRD-~b@XX{ikK;5{vTpF_W0NZ_-BqWBp<|QRYH z5&r|t2l}sL`r;2!=-x0rlkNq}r-ApX;hc(Z` z=2>#6JYz{8(pQyH=?kmyf$Ls+>7`rXAfe5P;i1^=d~|8|%rnoN_x$tE*Kkep(9`C6 zbC{g#SN5)N4v{0V7z6Hs*eQu`J>XnqoQcf_lKT^KB==%cUv&8#o?APv`iJcGJ@9k?{rA^I=EKBDiSeSX#caIGnZ9HW z(>BOo52m)f>z-2mOoc!E2LI2M2eF%T+#j58oXDd-<9GA5J$kN!^D)THH)hNY#-4h6 z7H50{n|H-F#~+CNi#zzlaoDFTc1|eYx$sMf_ z#qzA(-8=OWIhJp2>KcCc+TJsQe7X0jpR$ZOv73{@UV=Vd;B~(|{srp&ukf+j;C}-A zyY*MsFmEY8KA($EJ@kdj{kIUG_I-Vuv1vY)SAAO_7PCGWpN$=39&sO#XUEF9c2<`D z?%s>cJbd&)u%rX^!*9vi3v;xwfgNIbJ)thVy##=n>)fx&EcE8gKT@FM;zB&pr3t0cXrZBR%vJ_^MC-{Tq5L z$0kXAN;x($$(#F;`1N;j7_)L|Glty5?N8WeT7vBn`0UB`F#Kc!F~YHA;w1cHJRA=txAee2 z9%TQ%3C!o=-zR>X_>Oe#+itt924{)%cJ10lt#jwj>8)C|ip~{Bx@Kx#ih&dZDF#vu zq!{=y`vTE&FW#56M0Q_Bzmaq;DPsNxGm(5R}_fc_>Mz zr&3wsX^2yQK5y4`sQ72o*R_8<8dlY3_`M@_BA(addBz#(L0#r$*LgfRgLU#*`Bc|U zE8n|*a9JcTk}D3x_(wbuzU)f8;EwP!u!GOT{Ca!!I=X!Me_BmxSJJBfPphh`%gxj$ z#XyRIuf;$znXEw#t^u{QAM@^b74MC=@a}s%?T=jlo|;xO>Y-=zzZ`XlX0paN|F{nI z;nR@Oiucogyaz6zj`bOJykg#|%6X?s(kghzDxqP1n9pSUU z+J`z@3sHu>@80MW)j_Pk*{p-utYurv@}1nV)pJd0`*)<<2_x{LLFYjZxU)HkXPSoiS0_Ru!!!q{BuIDe^* zp}f329sFH*B`=ppb%BFC@OMLx-@o(HADSCeYG9@ zZNfh7@WJJ-X}wpxKF1a-@rO%XpBisdbXp;om9lq&^G2T`D-90%i(4HWBSvdS@$Z=a z{-M3C@rlbCpnQX`h@8+IeJ{c%mgy^U7uO`=c#<;EwI!UF`CJ}}wfX#p)4*rc8|vZD z^W{{#$SbiC)z8J??^6E$WpC|Vy|t6IIAkRl^E=l8^EJfER_k2q?|S%FUhCGaYl6GK z^(1Rd+6dgm_(+>d`DZY8oqp^*utmO)zNPR{U_8XXhsup(^Z|Vy`JeOqO7wdU>(#?H zGmH;qQ6H+{x*syqs6ltOmgTe9N2qD#fc*yQL5e~r@(ON4$lYwm-t0OjR#Ms$#qfhB0)@8-O_y^!} zE*xdC_SMv>np6L&gD?Gv7~HJQ{0+%oKQw0T^%wMfhJ4bPHTo-C-&6)ZwFY}!s_gg! zcg^ta_4=PaA`fy2o;~1L%k#EySUVoi^R}!}t`De_`9S{djSp;7!m|PBx&Zx>KC>Q+ ze{F{Q>*ICy?cwqU{B)!1>Eqg596*Uj*?X6X5I{?Fj-XPu2s&k75!A}B6V$WBK~9a?1h{`l6-fcxRlR3F?a4q}ZIM-*L zE$r>B;ARgp_ORYP%>Ot0dogQefoBfqlBD~hxWTi5_p$Rh)4k$;i0ol-brSPNqPOek zac>U&W9;Q;aoyK5k9x}|_~(IjFs&54?=v^ryOG}nn~ecy1?PbZ_SluI2eh$y?){!O zI9s_!1#9?jK8pRY$-ZvOnW7;$A7-y!fnH_Ej`(-)mV2<5a&Ia91MF!Bt=MtaOk<5k zoYihZr^o1ffbBnEc@!)+at1lAapT5c%OCf}_+7l-TO+kzN^Q3}_19X|Wby1wwCv4) Upx8Qd?Y(^JvdIp6$Hw#i3x+GKy8r+H literal 11078 zcmeHN1z01=wmxIXz%a;Q!9s8gWC#*>Aqj*ega}C>KwLTrIxx7CWZm7}-QC^Y-QD$e zhs%B}J2SiQ-QBz2zDIpuCskc_PM!Mubft3s1F#9U0-Fu!cMO!K0SEyAk6&q@69eEy z?tKp+5peSOg1-w%U>gD^BS z1jEC_uxHO67#SIX(a}-ZyLT^)jg7(h_&7{VOu*#iBuq_B!SwVr%*@Qd?CdPe&CS95 z{5&iyEWqO8A}lQ}!SeDl1cN~cg+dSxhvA@u4uXRZJ{S%;bIO7aB^UO2hth3I7v(G*o&N=5CIQQIh;k@(CgY(ZnA1=7y z0=V$P3*n-RE`p0Mz8EgK;kxUtgX^!q9&Whd2DtIY8{wv#Zi1U{z8P-0nAAcO4c;X3o^2sORsi&TTr=NZro_XdOc=p+6;koCYgXf=r9$t9i1$gnr z7vZIsUV@ijei>eQhJOHS#7tm(64{ znCQizGngz6Pt>e3*aD-o2QnhKFwtk#$wgcaqoJm>C_gtlD9aTlAGX*Nkp7~#`A+$K=R14B8q)xLrQ!B= z8)BAMWzpztN$0>C1xsUg1-?Xw_-y)I>cqG)lV?!pR=2lGcpPQ}O&Ag@oA6Csrf6B- z;X>Nvo9HxFlWAan6@%qTrxI6Z@q4-(2~*)&Bwc6{rrElsJPNV0fkC5_t@?Ixnvf;a z=_kdOfJq%gr!gg6!~X^`IPFzuvYmdHA;}aD1t-WfBdcVfr#UOPq^hO{zllntNP5~u zNjz@*uxq@jJ77_zP-rY!@7^WM4?+tA`V67V=V(hFM=@EnXX->*es@!5U8~7rF)5_E zc@!2~E@zP0Ew0%KkEYY_Y)x*YaTLxe48!Hoo-BbAakStfZ%448xjnJRuTPiRk-mY! zfdN-DF^k6ORI=mP;+~1+#a@%y?`nyq()rrHIdu9NZwb@pwYO%^V2&LcP1X<_EZ!uA z-Pb=jgr1`Z1G2n2_UUP45-Ye?!Dn~aId(k*W+kU8d}6w6Kr9& zrOKT|zQybB9~c})58;De-X>;qT{|^bU>FI8gNsIq4Kay#&^hV=YW`qKm&avH(ho<_ z2$#1C)0?fSMi1(}eqR99Fg&0ur^#`x?06w2A70+$YPEZmHC1etADywR)q~h7YLuRt zh)1O=>vLo}iglpRYcqG*kwMe|M^%}EA(igr8}~-WbPRR6-NMZR!*Dp{%I@m-w$2#JzZS9+0j3^GyQ&{c+zpGb+Q(M$YTIDYP=&(~K z&7n8~UhR&_HQVs`DAyb?tG6&{46aOXADEb13WgRYqQza-F0E{*+#t_mnTpgNr%5iz zpgH|sUBcw5@evC~#<-n6i!zx+qci!<+FoR6e0qfE8w>bZX*fl*5MR|H$S~+y1!SDi z)92Rim|8PCreOkU(z#u2+!zk3fhSSud+gn+#yS$QktbxaQp+{+Xh{=~ol&Co1#I&8 z6<3W+z48&7GdQIdM5ioG;?QYy7Edf|q_XKWR2ww|N7b~(Q(4R`Jj)tD%vAh7e#1gx z>oiC2bZF#Nv9(-u(_*GpW^Rv-OTrNvRo#jt8YvY|HTeBj?JoJ6dDm9^JbsSJWz)7( z2|Mvrf>5PG_iw4f=yb|sae1WzgAcJ8>NCgI%B-yi!>)c8S7&o|YBl`I8k^UT{Qw~d zqOW4e)K;ITSI~62iVpLXSwuaZ?zD^!( znh~$pD3!K%IU>KFZkNk$=``wO)ftuU-=IyzM%HnovL<4D`PUF(K;Ev>YwVnTL$8ZNJZP();F>}y9j6!isB@%Rcd zg-Wgl&%!xwITwZNGizAtC3Wsl?hpj{GEk7OoO$yKwZv3tjXC; zg$c!_ySC@@b4mqffub_LSmH+9f;?5R%7au>D(cJC`VMVFG@iDt&fdvYm8F&I5ywtN zEdyO{OgukFsn+NXx>RCL0@>=-vE&K40;F0c+e)z_NZwXvolaWT($PUK#^D<5h=(C+ z+O9=NeCeh_p#vdAbF=VeEkwO`H!TatcO&L3QCl7wVW6mpNyS22BDg4s6Q^iz%P=O# zX*{OgyW0|b5X9RW=ka3 zXXlruWy|Q<1T|T?f*;bwk+8R96Uypng}Ft9ZNNKStc~K>? z(Yf66m~AyGZtO3;SjC_qiv(1Wx=p2OZV|h-dQ?)A^0D z@wN4dJ7f8UvAEKz&8(s=MM;%4$x*y2Vm3cNCAp%6omaghp|Bxk7oX27O5cX7Ns8GL zos^!;$L6ffNsGec)6z1EB27M@OC^;R6_eQ!Ykv7yHSd=j`>gz@m}C1ano0yU=yjld z7R%B3qq#Y&c{%&a$@#r}oIfiU2g}3xZ|30qUjEI2a&I>BZvIm*-jM&R5itH2i{ZC( z8B}5xrM`jAm+RJa>LtYVn5~=9l8{x#)~xoZsuFi(ljv*~eK$TaF1bXyqOc_+tAvzO zQC42rP+mqRXKu%Funt8cshCbn$jjZdHL)};okS%?r*n-)%`Qrj$cW1;Phl{#qRQgR zlw@iYuGv^!OQ0LI+jDZHB5`wOLVBiFU0V}fttL>m@r;7x=(mhvJ^&bKEDB18ZYA~QA(Onds@jgiE%`h zjGtbVoSd3nm{?!LkE~2XClX@fayCUlZ0_c51-tWGR~OtwCYS9*GcNd9cuE1$xQd^_ osF$(W4fSMF9kI6E*s(f1@~zdN|9RACb#?TwdjIssU+tg&0*25}b^rhX diff --git a/cat/index.html_pattern b/cat/index.html_pattern index 9b5ff1d..19f8b43 100644 --- a/cat/index.html_pattern +++ b/cat/index.html_pattern @@ -45,6 +45,10 @@
+ + +
+
diff --git a/cat/js/cat.js b/cat/js/cat.js index 610edb7..adbd362 100644 --- a/cat/js/cat.js +++ b/cat/js/cat.js @@ -1,3 +1,69 @@ +var pageLimit = 20; + +function secondStepSearch(caller) { + var fullSearchRequest = { + operation: 'fullSearch', + tmId: currentTmId, + limit: pageLimit, + offset: $('#current-offset').val(), + pattern:caller.innerHTML + } + + $.ajax({ + url: '/cat/concordia_gate.php', + type: 'post', + dataType: 'json', + success: function (data) { + presentFullSearchResults(data); + }, + data: fullSearchRequest + }); + +} + +function presentFullSearchResults(data) { + var pattern = $('#current-pattern').val(); + var offset = parseInt($('#current-offset').val()); + var begin = offset+1; + var end = offset + pageLimit; + var totalCount = parseInt(data['result']['totalCount']); + if (end > totalCount) { + end = totalCount; + } + + var result = 'Showing results '+begin+' - '+end+' of '+totalCount+'
'; + + for (j=0;j0: shutil.copytree('js', root_dir+'/js') shutil.copytree('css', root_dir+'/css') shutil.copytree('images', root_dir+'/images') -shutil.copy('favicon.ico', root_dir+'/favicon.ico') +shutil.copy('favicon.ico', '/var/www/html/favicon.ico') config = dict() diff --git a/cat/versions_available/europarl_sample.cfg b/cat/versions_available/europarl_sample.cfg index d6a4004..99cafcf 100644 --- a/cat/versions_available/europarl_sample.cfg +++ b/cat/versions_available/europarl_sample.cfg @@ -1,7 +1,7 @@ dir@#@europarl_sample concordia_host@#@localhost concordia_port@#@8800 -tmid@#@2 +tmid@#@1 desc@#@Welcome to the interactive Concordia demo. The system finds the longest matches of the pattern sentence in its translation memory. This translation memory is over 1.5M sentences taken from English-Polish corpus of European Law (Europarl + JRC-Acquis). Please enter an English sentence in the field below and press Enter (or use the search button). This instance of Concordia works best with law sentences, but is very likely to output some results for any English sentence. You can also use predefined samples, simply use the link "show/hide samples" and apply one of the sample sentences. After the search, click on the highlighted fragments to see their context. enjoy@#@Enjoy your work with the system! prompt@#@Enter search pattern (English sentence): diff --git a/cat/versions_enabled/europarl_sample.cfg b/cat/versions_enabled/europarl_sample.cfg new file mode 120000 index 0000000..c90ed2e --- /dev/null +++ b/cat/versions_enabled/europarl_sample.cfg @@ -0,0 +1 @@ +../versions_available/europarl_sample.cfg \ No newline at end of file diff --git a/cat/versions_enabled/jrc_enes.cfg b/cat/versions_enabled/jrc_enes.cfg deleted file mode 120000 index 8eec574..0000000 --- a/cat/versions_enabled/jrc_enes.cfg +++ /dev/null @@ -1 +0,0 @@ -../versions_available/jrc_enes.cfg \ No newline at end of file diff --git a/cat/versions_enabled/stocznia_plen.cfg b/cat/versions_enabled/stocznia_plen.cfg deleted file mode 120000 index 0ba3868..0000000 --- a/cat/versions_enabled/stocznia_plen.cfg +++ /dev/null @@ -1 +0,0 @@ -../versions_available/stocznia_plen.cfg \ No newline at end of file diff --git a/db/recreateDb.sh b/db/recreateDb.sh index 9bce649..380b2ab 100755 --- a/db/recreateDb.sh +++ b/db/recreateDb.sh @@ -1,13 +1,13 @@ #!/bin/sh echo "Recreating database schema..." -psql -U concordia -p 6543 -h localhost concordia_server -f concordia_server.sql +PGPASSWORD=concordia psql -U concordia -p 6543 -h localhost concordia_server -f concordia_server.sql echo "Inserting initial data..." for initFile in `ls init/*` do echo "Init file:" $initFile - psql -U concordia -p 6543 -h localhost concordia_server -f $initFile + PGPASSWORD=concordia psql -U concordia -p 6543 -h localhost concordia_server -f $initFile done echo "Concordia server database recreation complete!" diff --git a/startAll.sh b/startAll.sh new file mode 100755 index 0000000..facad13 --- /dev/null +++ b/startAll.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +cd db +./startPGbouncer.sh +cd .. + +mono LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe & + +scripts/start.sh \ No newline at end of file diff --git a/tests/addAlignedLemmatizedTM.py b/tests/addAlignedLemmatizedTM.py index aecc7dd..26f2960 100755 --- a/tests/addAlignedLemmatizedTM.py +++ b/tests/addAlignedLemmatizedTM.py @@ -58,6 +58,7 @@ data = { req = urllib2.Request(address) req.add_header('Content-Type', 'application/json') response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) +print(response) tmId = int(response['newTmId']) print "Added new tm: %d" % tmId diff --git a/tests/addLemmatizedTM.sh b/tests/addLemmatizedTM.sh index b581e3c..8cb99b0 100755 --- a/tests/addLemmatizedTM.sh +++ b/tests/addLemmatizedTM.sh @@ -1,7 +1,7 @@ #!/bin/sh -CORPUS_NAME="jrc_enes" +CORPUS_NAME="europarl_sample" SRC_LANG_ID=2 -TRG_LANG_ID=4 +TRG_LANG_ID=1 ./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt