From 20b4e44e7537762f6aceb72b039f6e8f8a50e201 Mon Sep 17 00:00:00 2001 From: pawelk Date: Wed, 19 Mar 2008 11:29:21 +0000 Subject: [PATCH] Wersja niestabilna, zawiera wstepne poprawki. git-svn-id: svn://atos.wmid.amu.edu.pl/utt@13 e293616e-ec6a-49c2-aa92-f4a8b91c5d16 --- app/Makefile | 2 ++ app/TODO | 28 +++++++++++++-- app/dist/tarball/INSTALL | 4 ++- app/dist/tarball/README | 2 +- app/src/compiledic/aut2fsa | Bin 11818 -> 0 bytes app/src/compiledic/compiledic | 22 +++++++++--- app/src/gph/gph | 6 ++++ app/src/grp/grp | 12 ++++--- app/src/kon/kon | 64 +++++++++++++++++++--------------- app/src/kot/kot | 9 ++++- app/src/mar/mar | 7 ++-- app/src/rm12/rm12 | 6 ++++ app/src/ser/ser | 29 ++++++++++----- app/src/tags/uam.tag2re | 6 ++++ app/src/tok/tok | 10 +++++- app/src/unfla/unfla | 7 ++++ 16 files changed, 159 insertions(+), 55 deletions(-) delete mode 100755 app/src/compiledic/aut2fsa diff --git a/app/Makefile b/app/Makefile index 58675dd..bc83743 100644 --- a/app/Makefile +++ b/app/Makefile @@ -54,6 +54,8 @@ dirs: .PHONY: components components: + cd $(SRC_DIR)/lib && make; cd $(CUR_DIR) + @for cmp in $(COMPONENTS); do\ cd $(SRC_DIR)/$$cmp && make && make copy; cd $(CUR_DIR); \ done diff --git a/app/TODO b/app/TODO index 7271464..3e41a41 100644 --- a/app/TODO +++ b/app/TODO @@ -1,6 +1,28 @@ -* wyprowadzic grp-pre i grp-post z grp do aux? -* zamienic kota na lepszego (Kubis) -* + +* gue SIE NIE KOMPILUJE !!! + +* zamienic kota na lepszego (Kubis) [TO] + +* uniezaleznic mar od attr.pm (ew. wcielic attr.pm) [TO] + +* mar nie obsluguje plikow konfiguracyjnych [JW] + +* compiledic - plik tymczasowe [PK] + +* ser i grp - parametr tags (zeby bral wskazany program tag2re) [TO] + +* lista zaleznosci dla komend system w perlu [PK] + +* polaczyc sen'y [TO] + +* poprawic utt_make_conf (wykrywanie slownikow) [PK] + +* generowanie i sprawdzanie zaleznosci dla tarballa [PK] + +* przygotowanie dystrybujcji slownikow [PK] + + +STARE 1. DONE. Makefile do gph (install). 2. (zrobione dla ser?) Nazwy pmdb2re -> pmdb.tag2re (grp, ser). diff --git a/app/dist/tarball/INSTALL b/app/dist/tarball/INSTALL index 2d46c52..a873a5a 100644 --- a/app/dist/tarball/INSTALL +++ b/app/dist/tarball/INSTALL @@ -1,4 +1,6 @@ Here you can find some information about how to install utt. You should just unpack archive and then -execute create_utt_conf.pl and remove it. +execute +bin/utt_make_config.pl + diff --git a/app/dist/tarball/README b/app/dist/tarball/README index 2f7f453..16e05e2 100644 --- a/app/dist/tarball/README +++ b/app/dist/tarball/README @@ -3,4 +3,4 @@ This directory contains Makefile, which allows to create tar.gz archive. To create archive, just write: make -Warning: you need define variable _UTT_DIST_DIR. +Warning: you need define variable UTT_DIST_DIR. diff --git a/app/src/compiledic/aut2fsa b/app/src/compiledic/aut2fsa deleted file mode 100755 index bd7a6d563d4c4e46e9c984d9619900243f8e81b2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11818 zcmcIq4Rlo1oqzKY9q8ypjEY~|QKRA_F#`cX+0!uLLB>rWkVHo@z6_Ir8BH?j%o_{} zH8_c897nTOtDdq)+}aj**)H}_t6f%tu!(qFMXGIAImhT~J87^6K~1fi{r&EHH+dOx zyY-yjoPX~7-TS})ulw;n=6{D;v&?3*DYDoVheFhxak?r{?>N`!sZhL%OPQsdubiWt zgeLY?bb$vX24GMT+MFN<`+zI9pdWNPFo}JXO%jD;K@Bq%g~ahnvC%KuwxbW!f&r~| zMd)QeE|pgBx1x_U1Wck`)IqAom}=-Id4Wl?ukJm)rRyw3>3Z1dB3+I?lI&lFe!wpk zDgTPr*xD;vTjsRJ+Ebln$#~gZV=PP)zAazTD8|xGj+q8J4Rk&TW-A`hH$Z2D&H#3^8G&IA3qLY*v}0~QxZ#+&{XmKkW^+?)a`F*|81eVGnA(`+lO zJ~0~xU@&DgnM!I=6Bn6=T#RdC%47Kz+KIW#5I>zm` zU=RBHfjMs%pnoy&C~%+2{{m{3mq3*NAOVWBZ?yrHg{awIZrb}8iXwN`EIlS zWwSqunxzUv{uMq`#*s=a~A|nEHNC zW|T1y`JXW5v&{>79dvod>G}*4&wkbH-->JEXA1Zgtl^~I5}rMq3wVK|-B7P5l7Vo& z?(>DCp@gOWN6GEzqDflxu4J0BcX(fFwfNjFb$-K^oXKe zvq4)OS&yJc5>>P%lBA1m@pcUi30>1Ti*3dP;%a?3tR;zjY!4glnB;g$SK1fHZD z*lahV{bVR=W(`>_tg$bntZ|^VvWDgk);K?P*0>>bvc`eGi8VayVvU^&{8^>1GGZkeUYusR-V2zu|cGmExG4o#fh%4U%6Z41p?@}l^ zuumDE-vEyBi-Y*jc#U67PolgW9vVlvm>xyhH+)cFdKP6khx-Mlhf!!&hIa}~PorMq zZ35Hd=q%zsf$4d)oVZJ1Mj*P7xIHXw4hh(>#f4+_l4M*E2S1!jb! zj}Y$!My>eXzbW%}`rXmGhSkxR9zySH__ylXXxBYXSAI5h9PjJHeAK6pIP>IRvt?iZ zHx(uO-Ote1b4V}B{|FbsH3#H0 z6tUliK?eWhQ9LH~^y@R8p$=S38+4YkZTz4VY=}4bXQuNuz39Nc+y-UKr}QIr4E(zx zGN1o^e0*?escR>6p%#x6=0B~ zd8Kq^6fWi6@aXYpaD~PU8uM2}QF?!eumAcrO}Wr#<}{em5_6YTT}|o!nMM!1Fz~_& zd@UxoINRjPrreAiPavve=yFx=j9%)_I7{<)Ag~C#r{@eXm=X%VtmP0 z@m(SL{!{SHlzd)5s;3#0cQX0Lyq=za6nqQ6itjBfNbsdL=Bk{MN4g?q{X2Q`wp3%T z%2miicd74TlgCGfY!!VJrwxMb%2ox*qJ!n?DtGGCOv;tsU%cj~rs31j^!RRL&AD*o zO}K!y*TpGZTI$X(1`{x?$rt0AlZ5%WjuUV>W@S)4j4macI(&79+mpTpjD6W)F!N!0 zz_#}tM?GV--#w~h9KMTuqd$In(c2_E&d0956hO>=B zJ%>8ox#iIi zy}{Y5evrl3r>FPy_tyTw)B6QDW^W$KG?Y4X%fDc3bAA_N+ll>*P-Ko~8%N=@qb`{9 zkM>sEJx|wukO?|_t07VSL$7}{+ccVOgvNK%Beq3vY(C%gC!2C*UJ#qfF3$nQ7U;Ean95p_T2eB?DX{h%h%kbHSL2`N8?bgeO zuMwu#zE4Mb)gzfPI51lkRwH7Wl2R>mM6_ z4(d+m{cRLd>oR>`70evX`Ard1ydN9>93m#y&z2+iBA{?F+vLvrgE&A%Fu!kz%cXW@ zUKm`0J2#Gs{$iNx#OaZr!`?<$`uP%01A~7qTCnV$xzsVjm&S1ZH9jbN{{wJ@=Ybe~ z2YDgo*J#DQ(|VIt3Ck~oQVvt)UBbwc()>bj^z^6ZAu0n)oVGrWY7v6H`Y8n02{0{{%WXcE*<^_Q zAGD3UZV2A<5VStaS5-=%hV?P9%&Cn0-0a=TUOfA-*V8ixdxaaZ(uEdfHi91wPimIHa3fxygmOiC5n%7M5AlaFYYD70W8&QV=p4cL+qeq3Ad#U!E#(1>{@I8 zeA3w2rm^XdElzI`08VEe*-9C}f8a!=dIwcL+$ej0>ljI8M#bi~zhye&V4$n8v~p@* zF8Qk<3-g<;3uf7PrW0n$ai7bJKV{Xvp8oGlGjR2%_N*g4;7Y4~wzS%7o3!C1*uc=f z{*k%PKY}T2*e@C?^``XLcl6uE8VaJ_HcB({m(YfZSbKW5Av_#)pBQz8-Z5cQwY{?I zHmBn0xd2{p!HpWR^A4YFZD_WCB72JEguV@7Bad1nnQm6wCs>{X%Wx8c zhE#gGAA~<}!P9*N7g9es()~8!$&Hk7A7{6=ujfGO!nAs?Exq|(yXXG5#Xb^Kjx$$G zxRb$?s{9z)wfWd|PmhC~o4YV6xP4vSS?o#kekGPmcc$c7OQ5>n#4U3;fyaRfx0>y5 znUiF%l%juk99(}CT)kLh#*)LGpxOS#a4(yU75f-htUPn(!@0pFF1}U`fB#QYV=Y5F zq2Mkmcw|!26IMwcs2p78;(;=}ijoX%Pg;09Jqy_(P8mLQ3||H)pLnpwJC5g^^LV8E z*D-kv!Mz#^bEX`*r5$niZn(r1ne)u8?pLlL@!y0J_o>N|g(U@mi@M?AC3XC8MR1NlbLj+e#!NgraXrk&id-{t?y4(=VZ z;|WGT&o`d=Ro!^1cs%7EPr1g=T%I0q;<;7deiDX!0civiYv^cn~iS>9KKr zhVRYsgV{7c)22t9Xi1Hx_dC}#@#)v(l~&~OYMfsTJFmyv3}=w<e;jA(C8lgErc8-#=Q^&Q1s<-{KFm-x z2OvgIpc+Emf_fX)Qa9?EAQszgb&741t4MtCb32{A*wO6sf{O2Py8Nh>C8xT2?EX`@ z1@X$SECw~hrcL0T=u^|qyD$;#zY*lU-|5=H572UKAYF8q^KJ+2+XUh}>r<$erEZ%t z&1qW(-I8@l+ELX_Cs44s#+$(_uSFyNQMIH&T=4o;v*vdl?vLYlJ6-%1eIw?I;-dI| zPanq8_!fhD)}2llze{h#VZv|d#UOq^XW^Qv!+h|Z*#_c!0>5SRTY3ZJ^7=f~{2sax z#P8lT5+`hY$6a_CM$NIKs1v}r{EG{JkC1~d?8UkO`9Z5eYe5OnouCIm{{;Fe=y}i~ z&^w?HL3jvq%>Z2pnhWxSR)eJ6mhHG+UR8CKcNTtXxXfEo<|`}r&MhxrP(H7`!aHkq zq{Uku(v9{xb1zf=O3d^A63Kr(W;$4w+}Ngv)}q!EMje$`v34979ZFeyT#uCDi{uPi_7A^gXuRQz8D zfUbB;NDnDxk*KyV5o(KQ(H4x9NGS{J@kA2pjOykvIw3OD77IgN9AfaVOvpCmtWAPD z9B*rjwCi&A|9s8+1@{)$viJ=SfNNbA_OV<9;)6T+#BXKj@Zwq)b2}?OG;j}+kNc6o zee~m6_Om8k3F4k4ANT1jTpR>1$D$HE+^^pPaj%k(`+5w3`&kz9vMdM5J<0u;zyWX- z+N53%A@Stp9_BUoG0(6LTuYlJe;94kK;+{&a2}lJIUxI4bCb1#Bp=U>2Jme|8~f>> z7C z&&eC{BOT9hS)|^((Lg@>+X=o-@QGIjlTjWh_gnb*dnkV!z1-wsCFSu8k(9Rsd^^Cm z{RDhJHu*Tmd@$f|xfVAjXM~U>14F`x#})GJtRW?@54K^0DR`kfp+l-#f8Zxn>rD z=mXal*Su9;%cY9K-*;t^@+7v_JZFfF>;dCEF|eVT=Z1j|4LlzVOzp-wU|?gmK8f>`Jb419Mg&e**Jq1Lx)hHcn#soZv8jb>})HpTm5b;d(Q$@e4fH zm4S^DhU>?`Smc)C5LpG24GxiEV8y&UOcn=L0q33YXGk(;6 z53qO!LQVY-qpo{I$}d)a2yC_IDd2K*e(B#Juq^cN5CET7Ig{*v2eo{TCZ2%+@fQom ziT-$T!5=&%|14lRpEi4m;sciPQtU^^{ROzO0Iw^+w*kxfx7p8B`U?7gP=KEW{tf)2 zJr^L0|%HgGfKN&l||=5GbEKLFehe7Vp!Q@I_O?{+vYh5ko@J1pZL2j=}3 z-NOFuz;tpUIncI>B9=g}*T8y|d zzE1(m^Tmt(xf@uXhcl2F^gOV9HzfZXz}DOt4<{N$=EhuL;wrS!{+ofVxiOo}{)^21 zJ`*1`@it(qzdr`<$Nob%?o&s{cbNRInfxCCZ?ddkC+=Bwmia6(?_n~R<`Q7(Z!!Fx zXZDYo@@h>?eO^!q*qU2&CosPm$oYG~O@k%(<+G@$$P{Ij4si)ShD?>tx zR@Ja_wN?|TZ$LVSrY)Un!dpN`E53cSl+C|#!2+d&RRspL#I^OU@wK5=O}tfT zp;V`0ynD2y+S)cs1?q~Wlkz24wt7jes!2p;@_JILcx4e%@vUXywYO+35xkA1!UC7aVj_cPp{B*+;uT8M z^zb?jnL10aU$LY%Pz9U$I;DjYiO@zZ($4G?OM}RLK@-PL;ps?33foLOS1wytuOew_ ziN8i=lr3yh+SI{jE3$$tAB(;sd8o2dB*2*YEg~U_NldL%+l^csvkhr8aCcpY7QK}j zLil)eCBA?$tx4n}v0(w;TP3dlx~W-HO~7JF#dY)fNBGPEJ0)-7b#(6T^{D)WsdC6zP32u@mCn058lAJ>e0q(c5FSz#y2 z(yFYqWFA37{a0(Cetx(!(wgXKy?}7|{~Bauf0>3^r`^m$J3)`gYAbAS2v`@_fGa80lt{Hu<=g>h-UeBv%k2W+F~errp4ly{TNxeVbH|jTjMMTa1(D z>*YZ*e{rgk!u&Zz%CcX~EW0UIXx6YZt) { - chomp; +#!/usr/bin/perl -w + +#package: UAM Text Tools +#component: kon (search context) +#version: 1.0 +#author: Tomasz Obrebski + +use strict; +use Getopt::Long; +use locale; +use File::HomeDir; + +Getopt::Long::Configure('no_ignore_case_always'); + +my $l='30c'; +my $r='30c'; +my $trim=0; +my $white=0; +my $bon='[0-9]+ [0-9]+ BOM .*'; +my $eon='[0-9]+ [0-9]+ EOM .*'; +my $bod='['; +my $eod=']'; +my $column=0; +my $ignore=0; +my $help=0; + +my $systemconfigfile='/usr/local/etc/utt/kon.conf'; +#my $userconfigfile="$ENV{'HOME'}/.utt/kon.conf"; +my $userconfigfile=home()."/.utt/kon.conf"; + +#read configuration files########################### +my $file; +foreach $file ($systemconfigfile, $userconfigfile){ + if(open(CONFIG, $file)){ + while () { + chomp; s/#.*//; s/^\s+//; s/\s+$//; diff --git a/app/src/kot/kot b/app/src/kot/kot index cd63cd6..f4b38f3 100755 --- a/app/src/kot/kot +++ b/app/src/kot/kot @@ -1,14 +1,21 @@ #!/usr/bin/perl +#package: UAM Text Tools +#component: kot +#version: 1.0 +#author: Tomasz Obrebski + use strict; use Getopt::Long; +use File::HomeDir; my $help=0; my $gap_fill="\n-----\n"; my $spaces=0; my $systemconfigfile='/usr/local/etc/utt/kot.conf'; -my $userconfigfile="$ENV{'HOME'}/.utt/kot.conf"; +#my $userconfigfile="$ENV{'HOME'}/.utt/kot.conf"; +my $userconfigfile=home()."/.utt/kot.conf"; #read configuration files########################### my $file; diff --git a/app/src/mar/mar b/app/src/mar/mar index 35318ad..3e7b3c8 100755 --- a/app/src/mar/mar +++ b/app/src/mar/mar @@ -1,8 +1,9 @@ #!/usr/bin/perl -#package: UAM Text Tools -#component name: mrk -#author: Marcin Walas +#package: UAM Text Tools +#component: mar +#version: 1.0 +#author: Marcin Walas #this program tags the tokenized file with given tags #tags can be given in any order and configuration through the expression diff --git a/app/src/rm12/rm12 b/app/src/rm12/rm12 index 09c7d2b..d039ee2 100755 --- a/app/src/rm12/rm12 +++ b/app/src/rm12/rm12 @@ -1,3 +1,9 @@ #!/bin/bash +#package: UAM Text Tools +#component: rm12 +#version: 1.0 +#author: Tomasz Obrebski + + sed -r '/[0-9]+[ \t]+[0-9]+[ \t]+BOS/! s/[0-9]+[ \t]+[0-9]+[ \t]//' diff --git a/app/src/ser/ser b/app/src/ser/ser index c9ba0fc..9a3cca8 100755 --- a/app/src/ser/ser +++ b/app/src/ser/ser @@ -2,19 +2,18 @@ #package: UAM Text Tools #component: ser (pattern search tool) -#author: Tomasz Obrębski +#version: 1.0 +#author: Tomasz Obrebski use strict; use Getopt::Long; use File::Temp; +use File::HomeDir; my $LIB_DIR="/usr/local/lib/utt"; - my $systemconfigfile='/usr/local/etc/utt/ser.conf'; -my $userconfigfile="$ENV{'HOME'}/.utt/ser.conf"; - -#use lib "$ENV{HOME}/.utt/lib/perl"; -#use attr; +#my $userconfigfile="$ENV{'HOME'}/.utt/ser.conf"; +my $userconfigfile=home()."/.utt/ser.conf"; Getopt::Long::Configure('no_ignore_case_always'); @@ -160,13 +159,25 @@ system "m4 \"--define=PATTERN=$flexpattern\" \"--define=DEFAULTACTION=$defaultac if($flex) { - system "cat $tmpfile_l"; - exit 0; + #system "cat $tmpfile_l"; + if(open(FLEX, $tmpfile_l)) { + while() { + print @_; + } + close FLEX; + } + else { + print "Unable to open file $tmpfile_l\n"; + } + exit 0; } system "flex -o$tmpfile_c $tmpfile_l"; system "cc -O3 -o $tmpfile_x $tmpfile_c -lfl"; system "$tmpfile_x"; -system "rm -f $tmpfile_l $tmpfile_c $tmpfile_x"; +#system "rm -f $tmpfile_l $tmpfile_c $tmpfile_x"; +unlink $tmpfile_l; +unlink $tmpfile_c; +unlink $tmpfile_x; diff --git a/app/src/tags/uam.tag2re b/app/src/tags/uam.tag2re index 1e3e591..033a168 100755 --- a/app/src/tags/uam.tag2re +++ b/app/src/tags/uam.tag2re @@ -1,5 +1,11 @@ #!/usr/bin/perl +#package: UAM Text Tools +#component: tags for utt +#version: 1.0 +#author: Tomasz Obrebski + +use strict; use locale; my $input = <>; diff --git a/app/src/tok/tok b/app/src/tok/tok index c83da04..87a8496 100755 --- a/app/src/tok/tok +++ b/app/src/tok/tok @@ -1,13 +1,21 @@ #!/usr/bin/perl +#package: UAM Text Tools +#component: tok (tokenizer) +#version: 1.0 +#author: Tomasz Obrebski + +use strict; use locale; use Getopt::Long; +use File::HomeDir; my $interactive=0; my $help; my $systemconfigfile='/usr/local/etc/utt/tok.conf'; -my $userconfigfile="$ENV{'HOME'}/.utt/tok.conf"; +#my $userconfigfile="$ENV{'HOME'}/.utt/tok.conf"; +my $userconfigfile=home()."/.utt/tok.conf"; #read configuration files########################### my $file; diff --git a/app/src/unfla/unfla b/app/src/unfla/unfla index dd73ca4..d522087 100755 --- a/app/src/unfla/unfla +++ b/app/src/unfla/unfla @@ -1 +1,8 @@ +#!/bin/bash + +#package: UAM Text Tools +#component: unfla +#version: 1.0 +#author: Tomasz Obrebski + tr '\014' '\012'