Pierwsza przymiarka do umieszczenia plikow w ~/.local/utt. Obsługa nowych opcji domyslnych. Nieskonczona dystrybucja tarball.
git-svn-id: svn://atos.wmid.amu.edu.pl/utt@41 e293616e-ec6a-49c2-aa92-f4a8b91c5d16
This commit is contained in:
parent
a7b254ce77
commit
6b3be72395
8
app/TODO
8
app/TODO
@ -1,18 +1,18 @@
|
||||
BARDZO WAZNE:
|
||||
|
||||
* w dystrybucji tarball umieszczac pliki w określonych miejscach
|
||||
* gue SIE NIE KOMPILUJE !!!
|
||||
|
||||
WAZNE:
|
||||
* zamienic kota na lepszego (Kubis) [TO]
|
||||
* sen - unikać dwukrotnego wstawiania BOSów i EOSów
|
||||
* uniezaleznic mar od attr.pm (ew. wcielic attr.pm) [TO]
|
||||
* mar nie obsluguje plikow konfiguracyjnych [JW]
|
||||
* ser i grp - parametr tags (zeby bral wskazany program tag2re) [TO]
|
||||
* polaczyc sen'y [TO]
|
||||
* programy z atrybutyem input-field: funkcja process_seg zwraca false, gdy nie ma pola podanego po -I
|
||||
* generowanie i sprawdzanie zaleznosci dla tarballa [PK]
|
||||
* przygotowanie dystrybujcji slownikow [PK]
|
||||
* (zrobione dla ser?) Nazwy pmdb2re -> pmdb.tag2re (grp, ser).
|
||||
* Zadania zwiazane z rozbudowa ser (src/ser/TODO).
|
||||
* Nowa funkcjonalność dla kot? con? - każde zdanie w nowym wierszu
|
||||
* Nowa funkcjonalność dla kot? con? - każde zdanie w nowym wierszu
|
||||
* opcja info w dgp powinna miec domyslna wartosc d lub h
|
||||
|
||||
|
||||
|
14
app/dist/common/utt_make_config.pl
vendored
14
app/dist/common/utt_make_config.pl
vendored
@ -19,6 +19,7 @@ conf_compiledic(catfile($usr_home, 'compiledic.conf'), $sys_home);
|
||||
conf_grp(catfile($usr_home, 'grp.conf'), $sys_home);
|
||||
conf_gue(catfile($usr_home, 'gue.conf'), $sys_home);
|
||||
conf_lem(catfile($usr_home, 'lem.conf'), $sys_home);
|
||||
conf_mar(catfile($usr_home, 'mar.conf'), $sys_home);
|
||||
conf_ser(catfile($usr_home, 'ser.conf'), $sys_home);
|
||||
conf_dgc(catfile($usr_home, 'dgc.conf'), $sys_home);
|
||||
|
||||
@ -85,6 +86,7 @@ sub conf_grp() {
|
||||
|
||||
print FILE makeConfigHeader();
|
||||
print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n";
|
||||
print FILE "tags=uam\n";
|
||||
|
||||
close FILE;
|
||||
}
|
||||
@ -145,6 +147,17 @@ sub conf_lem() {
|
||||
close FILE;
|
||||
}
|
||||
|
||||
sub conf_mar() {
|
||||
my $mar_file = shift;
|
||||
my $utthome = shift;
|
||||
open(FILE, ">$mar_file");
|
||||
|
||||
print FILE makeConfigHeader();
|
||||
print FILE "tags=uam\n";
|
||||
|
||||
close FILE;
|
||||
}
|
||||
|
||||
sub conf_ser() {
|
||||
my $ser_file = shift;
|
||||
my $utthome = shift;
|
||||
@ -153,6 +166,7 @@ sub conf_ser() {
|
||||
print FILE makeConfigHeader();
|
||||
print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n";
|
||||
print FILE "flex-template=", abs_path("$utthome/lib/utt/ser.l.template"), "\n";
|
||||
print FILE "tags=uam\n";
|
||||
|
||||
close FILE;
|
||||
}
|
||||
|
148
app/dist/struktura.txt
vendored
148
app/dist/struktura.txt
vendored
@ -1,71 +1,77 @@
|
||||
/usr/local/bin/aut2fsa
|
||||
/usr/local/bin/canonize
|
||||
/usr/local/bin/compiledic
|
||||
/usr/local/bin/con
|
||||
/usr/local/bin/cor
|
||||
/usr/local/bin/dgc
|
||||
/usr/local/bin/dgp
|
||||
/usr/local/bin/fla
|
||||
/usr/local/bin/fsm2aut
|
||||
/usr/local/bin/go ?
|
||||
/usr/local/bin/Makefile.go ?
|
||||
/usr/local/bin/attr.pm
|
||||
/usr/local/bin/gph
|
||||
/usr/local/bin/grp
|
||||
/usr/local/bin/gue
|
||||
/usr/local/bin/kot
|
||||
/usr/local/bin/lem
|
||||
/usr/local/bin/mar
|
||||
/usr/local/bin/sen
|
||||
/usr/local/bin/sen-nl
|
||||
/usr/local/bin/ser
|
||||
/usr/local/bin/tok
|
||||
/usr/local/bin/tre.rb
|
||||
/usr/local/bin/unfla
|
||||
/usr/local/bin/utt-make-config.pl
|
||||
/usr/local/bin/ipi.tag2re
|
||||
/usr/local/bin/uam.tag2re
|
||||
|
||||
|
||||
# R.D. sugeruje /etc/utt
|
||||
# lokalnie: ~/.utt lub (trendy) ~/.config/utt
|
||||
/usr/local/etc/utt/con.conf
|
||||
/usr/local/etc/utt/cor.conf
|
||||
/usr/local/etc/utt/dgc.conf
|
||||
/usr/local/etc/utt/fla.conf
|
||||
/usr/local/etc/utt/grp.conf
|
||||
/usr/local/etc/utt/gue.conf
|
||||
/usr/local/etc/utt/kor.conf
|
||||
/usr/local/etc/utt/kot.conf
|
||||
/usr/local/etc/utt/lem.conf
|
||||
/usr/local/etc/utt/mar.conf
|
||||
/usr/local/etc/utt/sen.conf
|
||||
/usr/local/etc/utt/ser.conf
|
||||
/usr/local/etc/utt/tok.conf
|
||||
/usr/local/etc/utt/unfla.conf
|
||||
/usr/local/etc/utt/utt.conf
|
||||
|
||||
/usr/local/share/utt/pl_PL.ISO-8859-2/pl_PL.ISO-8859-2.sym
|
||||
/usr/local/share/utt/pl_PL.ISO-8859-2/cor.bin
|
||||
/usr/local/share/utt/pl_PL.ISO-8859-2/gue.bin
|
||||
/usr/local/share/utt/pl_PL.ISO-8859-2/lem.bin
|
||||
/usr/local/share/utt/pl_PL.UTF-8/pl_PL.UTF-8.sym
|
||||
/usr/local/share/utt/pl_PL.UTF-8/cor.bin
|
||||
/usr/local/share/utt/pl_PL.UTF-8/gue.bin
|
||||
/usr/local/share/utt/pl_PL.UTF-8/lem.bin
|
||||
|
||||
/usr/local/lib/utt/ser.l.template
|
||||
/usr/local/lib/utt/terms.m4
|
||||
/usr/local/lib/utt/weights.kor # -> share
|
||||
/usr/local/lib/utt/seg.rb # lok. ~/.local/lib/utt/seg.rb
|
||||
/usr/local/lib/utt/cats.dgc # -> share
|
||||
/usr/local/lib/utt/gram.dgc # -> share
|
||||
|
||||
/usr/local/share/doc/utt/FAQ
|
||||
/usr/local/share/doc/utt/COPYRIGHT
|
||||
/usr/local/share/doc/utt/NEWS
|
||||
/usr/local/share/doc/utt/README
|
||||
/usr/local/share/info/utt.info.gz
|
||||
/usr/local/share/man/man3/utt.gz
|
||||
|
||||
~/.utt/*.conf (wszystko z /usr/local/etc/utt)
|
||||
/usr/local/bin/aut2fsa
|
||||
/usr/local/bin/canonize
|
||||
/usr/local/bin/compiledic
|
||||
/usr/local/bin/con
|
||||
/usr/local/bin/cor
|
||||
/usr/local/bin/dgc
|
||||
/usr/local/bin/dgp
|
||||
/usr/local/bin/fla
|
||||
/usr/local/bin/fsm2aut
|
||||
/usr/local/bin/go ?
|
||||
/usr/local/bin/Makefile.go ?
|
||||
/usr/local/bin/gph
|
||||
/usr/local/bin/grp
|
||||
/usr/local/bin/gue
|
||||
/usr/local/bin/kot
|
||||
/usr/local/bin/lem
|
||||
/usr/local/bin/mar
|
||||
/usr/local/bin/sen
|
||||
/usr/local/bin/sen-nl
|
||||
/usr/local/bin/ser
|
||||
/usr/local/bin/tok
|
||||
/usr/local/bin/tre.rb
|
||||
/usr/local/bin/unfla
|
||||
/usr/local/bin/utt-make-config.pl
|
||||
/usr/local/bin/ipi.tag2re
|
||||
/usr/local/bin/uam.tag2re
|
||||
|
||||
|
||||
# R.D. sugeruje /etc/utt
|
||||
# lokalnie: ~/.utt lub (trendy) ~/.config/utt
|
||||
/usr/local/etc/utt/con.conf
|
||||
/usr/local/etc/utt/cor.conf
|
||||
/usr/local/etc/utt/dgc.conf
|
||||
/usr/local/etc/utt/fla.conf
|
||||
/usr/local/etc/utt/grp.conf
|
||||
/usr/local/etc/utt/gue.conf
|
||||
/usr/local/etc/utt/kor.conf
|
||||
/usr/local/etc/utt/kot.conf
|
||||
/usr/local/etc/utt/lem.conf
|
||||
/usr/local/etc/utt/mar.conf
|
||||
/usr/local/etc/utt/sen.conf
|
||||
/usr/local/etc/utt/ser.conf
|
||||
/usr/local/etc/utt/tok.conf
|
||||
/usr/local/etc/utt/unfla.conf
|
||||
/usr/local/etc/utt/utt.conf
|
||||
|
||||
/usr/local/share/utt/weights.kor
|
||||
# lokalnie: ~/.local/share/utt/weights.kor
|
||||
/usr/local/share/utt/cats.dgc
|
||||
# lokalnie: ~/.local/share/utt/cats.dgc
|
||||
/usr/local/share/utt/gram.dgc
|
||||
# lokalnie: ~/.local/share/utt/gram.dgc
|
||||
|
||||
/usr/local/share/utt/pl_PL.ISO-8859-2/pl_PL.ISO-8859-2.sym
|
||||
/usr/local/share/utt/pl_PL.ISO-8859-2/cor.bin
|
||||
/usr/local/share/utt/pl_PL.ISO-8859-2/gue.bin
|
||||
/usr/local/share/utt/pl_PL.ISO-8859-2/lem.bin
|
||||
/usr/local/share/utt/pl_PL.UTF-8/pl_PL.UTF-8.sym
|
||||
/usr/local/share/utt/pl_PL.UTF-8/cor.bin
|
||||
/usr/local/share/utt/pl_PL.UTF-8/gue.bin
|
||||
/usr/local/share/utt/pl_PL.UTF-8/lem.bin
|
||||
|
||||
/usr/local/lib/utt/ser.l.template
|
||||
/usr/local/lib/utt/terms.m4
|
||||
/usr/local/lib/utt/seg.rb
|
||||
# lokalnie: ~/.local/lib/utt/seg.rb <-- tego nie trzeba tak
|
||||
/usr/local/lib/attr.pm
|
||||
# lokalnie: ~/.local/lib/utt/attr.pm
|
||||
|
||||
/usr/local/share/doc/utt/FAQ
|
||||
/usr/local/share/doc/utt/COPYRIGHT
|
||||
/usr/local/share/doc/utt/NEWS
|
||||
/usr/local/share/doc/utt/README
|
||||
/usr/local/share/info/utt.info.gz
|
||||
/usr/local/share/man/man3/utt.gz
|
||||
|
||||
~/.utt/*.conf (wszystko z /usr/local/etc/utt)
|
||||
|
@ -5,11 +5,13 @@
|
||||
#version: 1.0
|
||||
#author: Tomasz Obrebski
|
||||
|
||||
use lib "/usr/local/lib/utt";
|
||||
use lib "ENV{HOME}/.local/lib/utt";
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
use attr;
|
||||
#use File::HomeDir;
|
||||
#use lib "$ENV{HOME}/.utt/lib/perl";
|
||||
|
||||
|
||||
my $help;
|
||||
|
||||
|
@ -6,12 +6,12 @@
|
||||
#author: Tomasz Obrebski
|
||||
|
||||
# wymaga niejawnie programu canonize!!!!
|
||||
#use lib "ENV{HOME}/.utt/lib/perl";
|
||||
use lib "/usr/local/lib/utt";
|
||||
use lib "ENV{HOME}/.local/lib/utt";
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
use Data::Dumper;
|
||||
|
||||
use attr;
|
||||
use File::HomeDir;
|
||||
|
||||
|
@ -1,297 +1,298 @@
|
||||
#!/usr/bin/ruby
|
||||
|
||||
require 'getoptlong'
|
||||
|
||||
opts = GetoptLong.new(
|
||||
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
||||
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
||||
[ '--format', '-F', GetoptLong::REQUIRED_ARGUMENT ],
|
||||
[ '--info', '-I', GetoptLong::REQUIRED_ARGUMENT ],
|
||||
[ '--only-trees','-t', GetoptLong::NO_ARGUMENT ])
|
||||
|
||||
$helptext=
|
||||
"The program generates trees from the graph output by dgp. dgp must\n"+
|
||||
"must be run with '-i ds' option.\n\n"+
|
||||
"Command: tre [options]\n\n"+
|
||||
"Options:\n"+
|
||||
"--help -h Print help (this text) and exit.\n"+
|
||||
"--debug -d Verbose output. For developers only.\n"+
|
||||
"--format=s -F s Output format. Recognized values:\n"+
|
||||
" a root + list of arcs\n"+
|
||||
" p parenthesized notation\n"+
|
||||
" h human readable indented tree format\n"+
|
||||
" Multiple values are allowed. (default p)\n"+
|
||||
"--info=s -I s Information printed. Recognized values:\n"+
|
||||
" n node identifier\n"+
|
||||
" f surface form\n"+
|
||||
" m morphological information\n"+
|
||||
" l arc labels\n"+
|
||||
"--only-trees -t Do not copy input. Print trees only.\n"
|
||||
|
||||
$DEBUG=false
|
||||
$FORMAT='p'
|
||||
$INFO='DEFAULT'
|
||||
$ONLYTREES=false
|
||||
|
||||
opts.each do |opt, arg|
|
||||
case opt
|
||||
when '--help'
|
||||
print $helptext
|
||||
exit 0
|
||||
when '--debug'
|
||||
$DEBUG=true
|
||||
when '--format'
|
||||
$FORMAT=arg
|
||||
when '--info'
|
||||
$INFO=arg
|
||||
when '--only-trees'
|
||||
$ONLYTREES=true
|
||||
else
|
||||
print "Unknown option #{opt}. Ignored.\n"
|
||||
end
|
||||
end
|
||||
|
||||
if $INFO=='DEFAULT'
|
||||
case $FORMAT
|
||||
when 'p','a'
|
||||
$INFO='nl'
|
||||
when 'h'
|
||||
$INFO='fmnl'
|
||||
end
|
||||
end
|
||||
|
||||
load 'seg.rb'
|
||||
|
||||
$dgpsep=';'
|
||||
|
||||
def tre(input)
|
||||
$gphid=[]
|
||||
$form=[]
|
||||
$lem=[]
|
||||
nodes=[]
|
||||
count=0
|
||||
seg=Seg.new
|
||||
for line in input
|
||||
print line unless $ONLYTREES
|
||||
seg.set(line)
|
||||
if dgp=seg['dgp']
|
||||
if nodes==[] && seg[3]!='BOS'
|
||||
print "A sentence must start with BOS segment. Aborting.\n"
|
||||
return
|
||||
end
|
||||
|
||||
id=dgp[/^\d+/].to_i
|
||||
|
||||
if gph=seg['gph']
|
||||
$gphid[id]=gph[/^\d+/].to_i
|
||||
else
|
||||
print "No gph field. Aborting.\n"
|
||||
return
|
||||
end
|
||||
|
||||
$form[$gphid[id]]=seg[4]
|
||||
$lem[$gphid[id]]=seg['lem']
|
||||
|
||||
nodes[id] = [seg[1].to_i,dgp]
|
||||
|
||||
if seg[3]=='EOS'
|
||||
$pref = "#{seg[1]} #{seg[2]} SYN *"
|
||||
parsegraph(nodes)
|
||||
printgraph if $DEBUG
|
||||
$thetrees=[]
|
||||
gentrees2
|
||||
for t in $thetrees
|
||||
count += 1
|
||||
t1=ground(t)
|
||||
case $FORMAT
|
||||
when /a/
|
||||
print "#{$pref} tre:#{count} arc:"
|
||||
printarcs(t1[0],t1[1])
|
||||
print "\n"
|
||||
when /p/
|
||||
print "#{$pref} tre:#{count} par:"
|
||||
printpar(t1[0],t1[1])
|
||||
print "\n"
|
||||
when /h/
|
||||
print "#\n# tree #{count}\n# ------\n"
|
||||
printtree(t1[0],t1[1],0)
|
||||
end
|
||||
end
|
||||
nodes=[]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def nodeinfo(id)
|
||||
info=""
|
||||
if $INFO =~ /n/
|
||||
info += id.to_s
|
||||
info += '.' if $INFO =~ /[fm]/
|
||||
end
|
||||
if $INFO =~ /f/
|
||||
info += $form[id]
|
||||
info += ';' if $INFO =~ /m/
|
||||
end
|
||||
if $INFO =~ /m/
|
||||
info += $lem[id]
|
||||
end
|
||||
info
|
||||
end
|
||||
|
||||
|
||||
def printarcs(root,arcs)
|
||||
print nodeinfo(root)
|
||||
for a in arcs
|
||||
print ';'
|
||||
print "#{a[2]}:" if $INFO =~ /l/
|
||||
print nodeinfo(a[0])+'-'+nodeinfo(a[1])
|
||||
end
|
||||
end
|
||||
|
||||
def printtree(root,arcs,o)
|
||||
if o==0
|
||||
print "# %-16s" % "root: "
|
||||
end
|
||||
print nodeinfo(root),"\n"
|
||||
for arc in arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
|
||||
print '# '," "*(o+1)
|
||||
print "%-16s" % (arc[2]+": ")
|
||||
printtree(arc[1],arcs,o+1)
|
||||
end
|
||||
end
|
||||
|
||||
def printpar(root,arcs)
|
||||
print nodeinfo(root)
|
||||
deps = arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
|
||||
unless deps == []
|
||||
print '('
|
||||
cont=false
|
||||
for arc in deps
|
||||
if cont then print ',' else cont=true end
|
||||
print arc[2],':' if $INFO =~ /l/
|
||||
printpar(arc[1],arcs)
|
||||
end
|
||||
print ')'
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def parsegraph(nodes)
|
||||
|
||||
$n =nodes.length
|
||||
$sat =[];
|
||||
|
||||
$vis =[];
|
||||
$succ=[];
|
||||
$lhs =[];
|
||||
$arcs=[];
|
||||
$pos=[]
|
||||
|
||||
for dgp in nodes
|
||||
|
||||
parts = dgp[1].split($dgpsep,6)
|
||||
|
||||
i = parts[0].to_i
|
||||
$pos[i] = dgp[0].to_i
|
||||
$sat << i if parts[1]=="s"
|
||||
$arcs |= parts[2].split(',').map{ |a| case a
|
||||
when /\-\-(\w+)-(\d+)\/(\d+)/
|
||||
[i, $2.to_i, $1, $3.to_i]
|
||||
when /\+\+(\d+)-(\w+)\/(\d+)/
|
||||
[$1.to_i, i, $2, $3.to_i]
|
||||
end }
|
||||
$succ |= parts[3][1..-2].split(',').map{|x| [x.to_i,i]}
|
||||
$vis |= parts[4][1..-2].split(',').map{|x| [x.to_i,i]}
|
||||
$lhs |= parts[5][1..-2].split(',').map{|x| [x.to_i,i]} + [[i,i]]
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def ground(t)
|
||||
[ $gphid[t[0]] , t[1].map{|a| [$gphid[a[0]],$gphid[a[1]],a[2]]} ]
|
||||
end
|
||||
|
||||
|
||||
def gentrees2()
|
||||
$thetrees=[];
|
||||
bos=0; eos=$n-1;
|
||||
roots = (1...eos).select{|i| $vis.include? [i,eos]}.select{|i| $vis.include? [bos,i]}
|
||||
if $DEBUG then print "ROOTS: #{roots.inspect}\n" end
|
||||
for i in roots
|
||||
$theroot=i
|
||||
for r in buildR(i , eos, [])
|
||||
(rmin,rmax,rtree) = r
|
||||
buildR(bos, rmin, rtree)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def buildR(min, max, tree)
|
||||
if $DEBUG then print "buildR--#{min}--#{max}--#{tree.inspect}\n" end
|
||||
trees=[]
|
||||
for a in $arcs.select{|a| a[0]==max && $vis.include?([min,a[1]]) }
|
||||
if $DEBUG then print "ARC: #{a.inspect}\n" end
|
||||
for r in buildR(a[1],a[3],tree+[a])
|
||||
(rmin,rmax,rarcs) = r
|
||||
for l in buildR(min,rmin,rarcs)
|
||||
(lmin,lmax,larcs) = l
|
||||
trees << [lmin,rmax,larcs]
|
||||
end
|
||||
end
|
||||
end
|
||||
for i in (0...$n).select{|i| $succ.include?([i,max])}.select{|i| $lhs.include?([min,i])}
|
||||
for l in buildL(min,i,tree)
|
||||
(lmin,lmax,larcs) = l
|
||||
trees << [lmin,lmax,larcs]
|
||||
end
|
||||
end
|
||||
trees
|
||||
end
|
||||
|
||||
|
||||
def buildL(min,max,tree)
|
||||
if $DEBUG then print "buildL--#{min}--#{max}--#{tree.inspect}\n" end
|
||||
if $pos[min]==$pos[max]
|
||||
if min==0 && max==0
|
||||
$thetrees.push [$theroot,tree]
|
||||
if $DEBUG then print "adding tree: #{tree.inspect}\n" end
|
||||
end
|
||||
return [[max,max,tree]]
|
||||
end
|
||||
trees=[]
|
||||
for arc in $arcs.select{|a| a[1]==max && $lhs.include?([min,a[0]]) }
|
||||
if $DEBUG then print "ARC: #{arc.inspect}\n" end
|
||||
for r in buildR(arc[3],max,tree+[arc])
|
||||
(rmin,rmax,rarcs) = r
|
||||
for l in buildL(min,rmin,rarcs)
|
||||
(lmin,lmax,larcs) = l
|
||||
trees << [lmin,lmax,larcs]
|
||||
end
|
||||
end
|
||||
end
|
||||
trees
|
||||
end
|
||||
|
||||
|
||||
def printgraph()
|
||||
|
||||
print "N: #{$n}\n"
|
||||
print "SAT: #{set_to_s($sat)}\n"
|
||||
print "SUCC: #{rel_to_s($succ)}\n"
|
||||
print "VIS: #{rel_to_s($vis)}\n"
|
||||
print "LHS: #{rel_to_s($lhs)}\n"
|
||||
print "ARCS: #{arcs_to_s($arcs)}\n"
|
||||
end
|
||||
|
||||
def set_to_s(s) "{#{s.join(',')}}" end
|
||||
def rel_to_s(r) "{#{r.map{|p| "(#{p[0]},#{p[1]})"}.join(',')}}" end
|
||||
def arc_to_s(q) "-#{q[0]}-#{q[2]}-#{q[1]}/#{q[3]}" end
|
||||
def arcs_to_s(a) "{#{a.map{|q| arc_to_s(q)}.join(',')}}" end
|
||||
|
||||
######################################################################
|
||||
|
||||
tre($stdin)
|
||||
#!/usr/bin/ruby -I /usr/local/lib/utt -I ~/.local/lib/utt
|
||||
|
||||
require 'getoptlong'
|
||||
|
||||
opts = GetoptLong.new(
|
||||
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
||||
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
||||
[ '--format', '-F', GetoptLong::REQUIRED_ARGUMENT ],
|
||||
[ '--info', '-I', GetoptLong::REQUIRED_ARGUMENT ],
|
||||
[ '--only-trees','-t', GetoptLong::NO_ARGUMENT ])
|
||||
|
||||
$helptext=
|
||||
"The program generates trees from the graph output by dgp. dgp must\n"+
|
||||
"must be run with '-i ds' option.\n\n"+
|
||||
"Command: tre [options]\n\n"+
|
||||
"Options:\n"+
|
||||
"--help -h Print help (this text) and exit.\n"+
|
||||
"--debug -d Verbose output. For developers only.\n"+
|
||||
"--format=s -F s Output format. Recognized values:\n"+
|
||||
" a root + list of arcs\n"+
|
||||
" p parenthesized notation\n"+
|
||||
" h human readable indented tree format\n"+
|
||||
" Multiple values are allowed. (default p)\n"+
|
||||
"--info=s -I s Information printed. Recognized values:\n"+
|
||||
" n node identifier\n"+
|
||||
" f surface form\n"+
|
||||
" m morphological information\n"+
|
||||
" l arc labels\n"+
|
||||
"--only-trees -t Do not copy input. Print trees only.\n"
|
||||
|
||||
$DEBUG=false
|
||||
$FORMAT='p'
|
||||
$INFO='DEFAULT'
|
||||
$ONLYTREES=false
|
||||
|
||||
opts.each do |opt, arg|
|
||||
case opt
|
||||
when '--help'
|
||||
print $helptext
|
||||
exit 0
|
||||
when '--debug'
|
||||
$DEBUG=true
|
||||
when '--format'
|
||||
$FORMAT=arg
|
||||
when '--info'
|
||||
$INFO=arg
|
||||
when '--only-trees'
|
||||
$ONLYTREES=true
|
||||
else
|
||||
print "Unknown option #{opt}. Ignored.\n"
|
||||
end
|
||||
end
|
||||
|
||||
if $INFO=='DEFAULT'
|
||||
case $FORMAT
|
||||
when 'p','a'
|
||||
$INFO='nl'
|
||||
when 'h'
|
||||
$INFO='fmnl'
|
||||
end
|
||||
end
|
||||
|
||||
require File.expand_path(File.dirname(__FILE__) + "../lib/utt/seg.rb")
|
||||
#require 'seg.rb'
|
||||
|
||||
$dgpsep=';'
|
||||
|
||||
def tre(input)
|
||||
$gphid=[]
|
||||
$form=[]
|
||||
$lem=[]
|
||||
nodes=[]
|
||||
count=0
|
||||
seg=Seg.new
|
||||
for line in input
|
||||
print line unless $ONLYTREES
|
||||
seg.set(line)
|
||||
if dgp=seg['dgp']
|
||||
if nodes==[] && seg[3]!='BOS'
|
||||
print "A sentence must start with BOS segment. Aborting.\n"
|
||||
return
|
||||
end
|
||||
|
||||
id=dgp[/^\d+/].to_i
|
||||
|
||||
if gph=seg['gph']
|
||||
$gphid[id]=gph[/^\d+/].to_i
|
||||
else
|
||||
print "No gph field. Aborting.\n"
|
||||
return
|
||||
end
|
||||
|
||||
$form[$gphid[id]]=seg[4]
|
||||
$lem[$gphid[id]]=seg['lem']
|
||||
|
||||
nodes[id] = [seg[1].to_i,dgp]
|
||||
|
||||
if seg[3]=='EOS'
|
||||
$pref = "#{seg[1]} #{seg[2]} SYN *"
|
||||
parsegraph(nodes)
|
||||
printgraph if $DEBUG
|
||||
$thetrees=[]
|
||||
gentrees2
|
||||
for t in $thetrees
|
||||
count += 1
|
||||
t1=ground(t)
|
||||
case $FORMAT
|
||||
when /a/
|
||||
print "#{$pref} tre:#{count} arc:"
|
||||
printarcs(t1[0],t1[1])
|
||||
print "\n"
|
||||
when /p/
|
||||
print "#{$pref} tre:#{count} par:"
|
||||
printpar(t1[0],t1[1])
|
||||
print "\n"
|
||||
when /h/
|
||||
print "#\n# tree #{count}\n# ------\n"
|
||||
printtree(t1[0],t1[1],0)
|
||||
end
|
||||
end
|
||||
nodes=[]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def nodeinfo(id)
|
||||
info=""
|
||||
if $INFO =~ /n/
|
||||
info += id.to_s
|
||||
info += '.' if $INFO =~ /[fm]/
|
||||
end
|
||||
if $INFO =~ /f/
|
||||
info += $form[id]
|
||||
info += ';' if $INFO =~ /m/
|
||||
end
|
||||
if $INFO =~ /m/
|
||||
info += $lem[id]
|
||||
end
|
||||
info
|
||||
end
|
||||
|
||||
|
||||
def printarcs(root,arcs)
|
||||
print nodeinfo(root)
|
||||
for a in arcs
|
||||
print ';'
|
||||
print "#{a[2]}:" if $INFO =~ /l/
|
||||
print nodeinfo(a[0])+'-'+nodeinfo(a[1])
|
||||
end
|
||||
end
|
||||
|
||||
def printtree(root,arcs,o)
|
||||
if o==0
|
||||
print "# %-16s" % "root: "
|
||||
end
|
||||
print nodeinfo(root),"\n"
|
||||
for arc in arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
|
||||
print '# '," "*(o+1)
|
||||
print "%-16s" % (arc[2]+": ")
|
||||
printtree(arc[1],arcs,o+1)
|
||||
end
|
||||
end
|
||||
|
||||
def printpar(root,arcs)
|
||||
print nodeinfo(root)
|
||||
deps = arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
|
||||
unless deps == []
|
||||
print '('
|
||||
cont=false
|
||||
for arc in deps
|
||||
if cont then print ',' else cont=true end
|
||||
print arc[2],':' if $INFO =~ /l/
|
||||
printpar(arc[1],arcs)
|
||||
end
|
||||
print ')'
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def parsegraph(nodes)
|
||||
|
||||
$n =nodes.length
|
||||
$sat =[];
|
||||
|
||||
$vis =[];
|
||||
$succ=[];
|
||||
$lhs =[];
|
||||
$arcs=[];
|
||||
$pos=[]
|
||||
|
||||
for dgp in nodes
|
||||
|
||||
parts = dgp[1].split($dgpsep,6)
|
||||
|
||||
i = parts[0].to_i
|
||||
$pos[i] = dgp[0].to_i
|
||||
$sat << i if parts[1]=="s"
|
||||
$arcs |= parts[2].split(',').map{ |a| case a
|
||||
when /\-\-(\w+)-(\d+)\/(\d+)/
|
||||
[i, $2.to_i, $1, $3.to_i]
|
||||
when /\+\+(\d+)-(\w+)\/(\d+)/
|
||||
[$1.to_i, i, $2, $3.to_i]
|
||||
end }
|
||||
$succ |= parts[3][1..-2].split(',').map{|x| [x.to_i,i]}
|
||||
$vis |= parts[4][1..-2].split(',').map{|x| [x.to_i,i]}
|
||||
$lhs |= parts[5][1..-2].split(',').map{|x| [x.to_i,i]} + [[i,i]]
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def ground(t)
|
||||
[ $gphid[t[0]] , t[1].map{|a| [$gphid[a[0]],$gphid[a[1]],a[2]]} ]
|
||||
end
|
||||
|
||||
|
||||
def gentrees2()
|
||||
$thetrees=[];
|
||||
bos=0; eos=$n-1;
|
||||
roots = (1...eos).select{|i| $vis.include? [i,eos]}.select{|i| $vis.include? [bos,i]}
|
||||
if $DEBUG then print "ROOTS: #{roots.inspect}\n" end
|
||||
for i in roots
|
||||
$theroot=i
|
||||
for r in buildR(i , eos, [])
|
||||
(rmin,rmax,rtree) = r
|
||||
buildR(bos, rmin, rtree)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def buildR(min, max, tree)
|
||||
if $DEBUG then print "buildR--#{min}--#{max}--#{tree.inspect}\n" end
|
||||
trees=[]
|
||||
for a in $arcs.select{|a| a[0]==max && $vis.include?([min,a[1]]) }
|
||||
if $DEBUG then print "ARC: #{a.inspect}\n" end
|
||||
for r in buildR(a[1],a[3],tree+[a])
|
||||
(rmin,rmax,rarcs) = r
|
||||
for l in buildR(min,rmin,rarcs)
|
||||
(lmin,lmax,larcs) = l
|
||||
trees << [lmin,rmax,larcs]
|
||||
end
|
||||
end
|
||||
end
|
||||
for i in (0...$n).select{|i| $succ.include?([i,max])}.select{|i| $lhs.include?([min,i])}
|
||||
for l in buildL(min,i,tree)
|
||||
(lmin,lmax,larcs) = l
|
||||
trees << [lmin,lmax,larcs]
|
||||
end
|
||||
end
|
||||
trees
|
||||
end
|
||||
|
||||
|
||||
def buildL(min,max,tree)
|
||||
if $DEBUG then print "buildL--#{min}--#{max}--#{tree.inspect}\n" end
|
||||
if $pos[min]==$pos[max]
|
||||
if min==0 && max==0
|
||||
$thetrees.push [$theroot,tree]
|
||||
if $DEBUG then print "adding tree: #{tree.inspect}\n" end
|
||||
end
|
||||
return [[max,max,tree]]
|
||||
end
|
||||
trees=[]
|
||||
for arc in $arcs.select{|a| a[1]==max && $lhs.include?([min,a[0]]) }
|
||||
if $DEBUG then print "ARC: #{arc.inspect}\n" end
|
||||
for r in buildR(arc[3],max,tree+[arc])
|
||||
(rmin,rmax,rarcs) = r
|
||||
for l in buildL(min,rmin,rarcs)
|
||||
(lmin,lmax,larcs) = l
|
||||
trees << [lmin,lmax,larcs]
|
||||
end
|
||||
end
|
||||
end
|
||||
trees
|
||||
end
|
||||
|
||||
|
||||
def printgraph()
|
||||
|
||||
print "N: #{$n}\n"
|
||||
print "SAT: #{set_to_s($sat)}\n"
|
||||
print "SUCC: #{rel_to_s($succ)}\n"
|
||||
print "VIS: #{rel_to_s($vis)}\n"
|
||||
print "LHS: #{rel_to_s($lhs)}\n"
|
||||
print "ARCS: #{arcs_to_s($arcs)}\n"
|
||||
end
|
||||
|
||||
def set_to_s(s) "{#{s.join(',')}}" end
|
||||
def rel_to_s(r) "{#{r.map{|p| "(#{p[0]},#{p[1]})"}.join(',')}}" end
|
||||
def arc_to_s(q) "-#{q[0]}-#{q[2]}-#{q[1]}/#{q[3]}" end
|
||||
def arcs_to_s(a) "{#{a.map{|q| arc_to_s(q)}.join(',')}}" end
|
||||
|
||||
######################################################################
|
||||
|
||||
tre($stdin)
|
||||
|
@ -13,7 +13,6 @@ use File::HomeDir;
|
||||
my $LIB_DIR="/usr/local/lib/utt";
|
||||
|
||||
my $systemconfigfile="/usr/local/etc/utt/grp.conf";
|
||||
#my $userconfigfile="$ENV{'HOME'}/.utt/grp.conf";
|
||||
my $userconfigfile=home()."/.utt/grp.conf";
|
||||
|
||||
Getopt::Long::Configure('no_ignore_case_always');
|
||||
|
@ -5,11 +5,14 @@
|
||||
#version: 1.0
|
||||
#author: Marcin Walas
|
||||
|
||||
#this program tags the tokenized file with given tags
|
||||
#this program tags the tokenized file with given tags
|
||||
#tags can be given in any order and configuration through the expression
|
||||
#which is one of the parametres of the script
|
||||
#contact: d287572@atos.wmid.amu.edu.pl, walasiek@gmail.com
|
||||
|
||||
use lib "/usr/local/lib/utt";
|
||||
use lib "ENV{HOME}/.local/lib/utt";
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
use File::HomeDir;
|
||||
|
@ -12,7 +12,6 @@ use File::HomeDir;
|
||||
|
||||
my $LIB_DIR="/usr/local/lib/utt";
|
||||
my $systemconfigfile='/usr/local/etc/utt/ser.conf';
|
||||
#my $userconfigfile="$ENV{'HOME'}/.utt/ser.conf";
|
||||
my $userconfigfile=home()."/.utt/ser.conf";
|
||||
|
||||
Getopt::Long::Configure('no_ignore_case_always');
|
||||
|
Loading…
Reference in New Issue
Block a user