many changes, mainly dgp1 algorithm

This commit is contained in:
Tomasz Obrebski 2014-12-17 12:10:45 +01:00
parent d484a3254b
commit acbabee742
10 changed files with 2521985 additions and 482 deletions

7
AUTHORS Normal file
View File

@ -0,0 +1,7 @@
Tomasz Obrębski <obrebski@amu.edu.pl>
Michał Stolarski
Justyna Walkowska <ynka@amu.edu.pl>
Pawel Konieczka
Marcin Walas
Paweł Wereński
Mateusz Hromada <ruanda@amu.edu.pl>

View File

@ -0,0 +1,24 @@
include ../../config.mak
TARGETS = lem.bin lem.cats cor.bin gue.bin
COMPDICDIR = ../
.PHONY: all
all: $(TARGETS)
# ------------------------------------------------------------------
# main section
# ------------------------------------------------------------------
lem.bin: lem.fst
../../src/compdic/compdic-fst-to-bin lem.fst lem.bin
lem.fst: lem.dic
../../src/compdic/compdic-dic-to-fst lem.dic lem.fst
lem.cats: lem.dic
../../src/compdic/compdic-dic-to-cats lem.dic lem.cats
.PHONY: clean
clean:
rm -f lem.bin lem.fst lem.cats

2521575
share/pl_PL.ISO-8859-2/lem.dic Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
#! /bin/bash #! /bin/bash
no_of_parts=0 no_of_parts=0
part_size=100000
while [ $# -gt 2 ] while [ $# -gt 2 ]
do do
@ -36,7 +37,7 @@ fst=$2
if [ $no_of_parts -eq 0 ] if [ $no_of_parts -eq 0 ]
then then
no_of_parts=$(( `cat $1 | wc -l` / 75000 + 1 )) no_of_parts=$(( `cat $1 | wc -l` / $part_size + 1 ))
fi fi

View File

@ -1,7 +1,7 @@
#include <iostream> #include <iostream>
using namespace std; using namespace std;
#include "dgp0.hh" #include "dgp1.hh"
#include "global.hh" #include "global.hh"
extern Grammar grammar; extern Grammar grammar;
@ -70,27 +70,15 @@ NodeProp compute_dep_prop(NodeProp depprop, const Link& link, list<Boubble*> bs)
//==================================================================================================== //====================================================================================================
int find_existing_node(int mnodeind, NodeProp p, bitset<MAXNODES>& newheadLH, bitset<MAXNODES>& newheadLV) int find_existing_node(int mnode, NodeProp p, Edge e)
{ {
MNode& mnode = mgraph[mnodeind]; for(vector<int>::iterator i = mgraph[mnode].snodes.begin(); i!=mgraph[mnode].snodes.end(); ++i)
int ret=-1; if(sgraph[*i].prop==p && sgraph[*i].edge==e)
for(vector<int>::iterator ps=mnode.snodes.begin(); ps!=mnode.snodes.end(); ++ps) {
{ if(debug) fprintf(stderr,"\t\treusing %d\n",*i);
if(debug) fprintf(stderr,"#find existing node: checking %d ... \n", *ps); return *i;
if(sgraph[*ps].prop==p) }
if(sgraph[*ps].LH==newheadLH && sgraph[*ps].LV==newheadLV) return -1;
{
ret = *ps;
if(debug) fprintf(stderr,"#\tsucceeded because of LH/LV equality ()\n");
}
else
{
if(debug) fprintf(stderr,"#\tfailed beacause of LH/LV inequality\n");
}
}
if(debug) fprintf(stderr,"\n");
return ret;
} }
//==================================================================================================== //====================================================================================================
@ -160,280 +148,102 @@ void create_reverse_links(int n)
//==================================================================================================== //====================================================================================================
int create_new_head_node_left(int anc, NodeProp& prop, bitset<MAXNODES>& LH, bitset<MAXNODES>& LD, bitset<MAXNODES>& LV) int create_new_node(int anc, NodeProp& prop, Edge edge)
{ {
int newheadind = sgraph.clone(anc,prop); int newheadind = sgraph.clone(anc,prop,edge);
nodelist.push_back(newheadind); nodelist.push_back(newheadind);
sgraph[newheadind].LH = LH;
sgraph[newheadind].LD = LD;
sgraph[newheadind].in_LH = true;
sgraph[newheadind].LV.reset();
copy_links(anc,newheadind); copy_links(anc,newheadind);
create_reverse_links(newheadind); create_reverse_links(newheadind);
if(debug) sgraph.print_node_debug(stderr,"clone",newheadind,anc);
if(debug) sgraph.print_node_debug(stderr,"add new",newheadind,anc);
// if(debug) print_sets(newheadind); // if(debug) print_sets(newheadind);
return newheadind; return newheadind;
} }
int create_new_dep_node_left(int anc, NodeProp& prop, bitset<MAXNODES>& LH, bitset<MAXNODES>& LD, bitset<MAXNODES>& LV)
{
int newind = sgraph.clone(anc,prop);
nodelist.push_back(newind);
sgraph[newind].LH.reset();
sgraph[newind].LD=LD;
sgraph[newind].in_LH=false; //???????
sgraph[newind].LV.reset();
copy_links(anc,newind);
create_reverse_links(newind);
if(debug) sgraph.print_node_debug(stderr,"add new",newind,anc);
// if(debug) print_sets(newind);
return newind;
}
int create_new_head_node_right(int anc, NodeProp& prop, bitset<MAXNODES>& newheadLH, bitset<MAXNODES>& newheadLD, bitset<MAXNODES>& newheadLV)
{
int newheadind = sgraph.clone(anc,prop);
nodelist.push_back(newheadind);
sgraph[newheadind].LH=newheadLH;
sgraph[newheadind].LD=newheadLD;
sgraph[newheadind].in_LH=false;
sgraph[newheadind].LV=newheadLV;
copy_links(anc,newheadind);
create_reverse_links(newheadind);
if(debug) sgraph.print_node_debug(stderr,"add new",newheadind,anc);
// if(debug) print_sets(newheadind);
return newheadind;
}
int create_new_dep_node_right(int anc, NodeProp& prop, bitset<MAXNODES>& LH, bitset<MAXNODES>& LD, bitset<MAXNODES>& LV)
{
int newind = sgraph.clone(anc,prop);
nodelist.push_back(newind);
sgraph[newind].LH=LH;
sgraph[newind].LD=LD;
sgraph[newind].in_LH=true; //???????
sgraph[newind].LV.reset();
copy_links(anc,newind);
create_reverse_links(newind);
if(debug) sgraph.print_node_debug(stderr,"ADD NEW",newind,anc);
// if(debug) print_sets(newind);
return newind;
}
//==================================================================================================== //====================================================================================================
void connect_left(int h, int d, const Link& l, list<Boubble*>& new_head_boubbles, list<Boubble*>& new_dep_boubbles) void connect_left(int h, int d, const Link& l, list<Boubble*>& new_head_boubbles, list<Boubble*>& new_dep_boubbles)
{ {
NodeProp &oldheadprop = sgraph[h].prop; NodeProp &old_head_prop = sgraph[h].prop;
NodeProp &olddepprop = sgraph[d].prop; NodeProp &old_dep_prop = sgraph[d].prop;
NodeProp new_head_prop = compute_head_prop(old_head_prop,l,new_head_boubbles,old_dep_prop.flags);
NodeProp new_dep_prop = compute_dep_prop(old_dep_prop,l,new_dep_boubbles);
NodeProp newheadprop = compute_head_prop(oldheadprop,l,new_head_boubbles,olddepprop.flags); Edge new_dep_edge(sgraph[d].edge);
int newd = find_existing_node(sgraph[d].mnode, new_dep_prop, new_dep_edge);
if( newd < 0 )
newd = create_new_node(d,new_dep_prop,new_dep_edge);
int newheadind; Edge new_head_edge(sgraph[newd].edge,newd);
if(oldheadprop==newheadprop) int newh = find_existing_node(sgraph[h].mnode, new_head_prop, new_head_edge);
newheadind = h; if( newh < 0 )
else newh = create_new_node(h,new_head_prop,new_head_edge);
{
bitset<MAXNODES> newheadLH = sgraph[h].LH;
bitset<MAXNODES> newheadLV = sgraph[d].LV;
bitset<MAXNODES> newheadLD = sgraph[h].LD;
newheadind = find_existing_node(sgraph[h].mnode, newheadprop, newheadLH, newheadLV); sgraph[newh].deps.push_back(Arc(newd,l.role,h,d));
if( newheadind >= 0) // W£¡CZONE sgraph[newd].heads.push_back(Arc(newh,l.role,h,d));
sgraph[newheadind].LD |= newheadLD;
else
{
newheadind = create_new_head_node_left(h,newheadprop,newheadLH,newheadLD,newheadLV);
sgraph[newheadind].edge.clear();
sgraph[newheadind].edge_contains_self = false;
}
} if(debug)
{
NodeProp newdepprop = compute_dep_prop(olddepprop,l,new_dep_boubbles); sgraph.print_arc(stderr,"link",newh,d,l.role,0);
sgraph.print_node_debug(stderr,"",newh,h);
int newdepind; sgraph.print_node_debug(stderr,"",newd,d);
}
if(olddepprop==newdepprop)
newdepind = d;
else
{
bitset<MAXNODES> newdepLH = sgraph[d].LH;
bitset<MAXNODES> newdepLV = sgraph[d].LV;
bitset<MAXNODES> newdepLD = sgraph[d].LD;
newdepind = find_existing_node(sgraph[d].mnode, newdepprop, newdepLH, newdepLV);
if( newdepind >= 0) // W£¡CZONE
sgraph[newdepind].LD |= newdepLD; // TYLKO DLA LD
else
{
newdepind = create_new_dep_node_left(d,newdepprop,newdepLH,newdepLD,newdepLV);
sgraph[newdepind].edge.clear();
//sgraph[newdepind].edge.push_back(newdepind); // TO
sgraph[newdepind].edge_contains_self = true; // LUB TO
}
}
sgraph[newheadind].deps.push_back(Arc(newdepind,l.role,h,d));
sgraph[newdepind].heads.push_back(Arc(newheadind,l.role,h,d));
sgraph[newheadind].edge.push_back(newdepind);
if(sgraph[d].saturated()) sgraph[newheadind].LV |= sgraph[d].LV;
sgraph[newheadind].LD.set(d);
if(sgraph[d].saturated()) sgraph[newheadind].LD |= sgraph[d].LD;
if(debug) sgraph.print_arc(stderr,"new link",newheadind,d,l.role,0);
if(debug) sgraph.print_node_debug(stderr,"update",newheadind,h);
// if(debug) print_sets(newheadind);
if(debug) sgraph.print_node_debug(stderr,"update",newdepind,d);
// if(debug) print_sets(newdepind);
} }
//---------------------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------------------
void connect_right(int h, int d, const Link& l, list<Boubble*>& new_head_boubbles, list<Boubble*>& new_dep_boubbles) void connect_right(int h, int d, const Link& l, list<Boubble*>& new_head_boubbles, list<Boubble*>& new_dep_boubbles)
{ {
NodeProp &oldheadprop = sgraph[h].prop; NodeProp &old_head_prop = sgraph[h].prop;
NodeProp &old_dep_prop = sgraph[d].prop;
NodeProp new_head_prop = compute_head_prop(old_head_prop,l,new_head_boubbles,old_dep_prop.flags);
NodeProp new_dep_prop = compute_dep_prop(old_dep_prop,l,new_dep_boubbles);
NodeProp newheadprop = compute_head_prop(oldheadprop,l,new_head_boubbles, sgraph[d].prop.flags); Edge new_head_edge(sgraph[h].edge);
int newh = find_existing_node(sgraph[h].mnode, new_head_prop, new_head_edge);
int newheadind; if( newh < 0 )
if(oldheadprop==newheadprop)
newheadind = h;
else
{
bitset<MAXNODES> newheadLH = sgraph[h].LH;
bitset<MAXNODES> newheadLV = sgraph[h].LV;
bitset<MAXNODES> newheadLD = sgraph[h].LD;
newheadind = find_existing_node(sgraph[h].mnode, newheadprop, newheadLH, newheadLV);
if(debug) fprintf(stderr,"#HEAD EXISTS %d\n",newheadind);
if( newheadind >= 0) // W£¡CZONE
sgraph[newheadind].LD |= newheadLD; // TYLKO DLA LD
else
{ {
newheadind = create_new_head_node_right(h,newheadprop,newheadLH,newheadLD,newheadLV); newh = create_new_node(h,new_head_prop,new_head_edge);
//if(!sgraph[h].edge.empty()) sgraph[newheadind].edge.push_back(newheadind); // TO sgraph[newh].visible_as_neighbour = false;
sgraph[newheadind].edge_contains_self = sgraph[h].edge_contains_self; // LUB TO
sgraph[newheadind].visible_as_neighbour = false;
} }
}
NodeProp &olddepprop = sgraph[d].prop; Edge new_dep_edge;
NodeProp newdepprop = compute_dep_prop(olddepprop,l,new_dep_boubbles); int newd = find_existing_node(sgraph[d].mnode, new_dep_prop, new_dep_edge);
if( newd < 0)
int newdepind; newd = create_new_node(d,new_dep_prop,new_dep_edge);
if(olddepprop==newdepprop)
newdepind = d;
else
{
bitset<MAXNODES> newdepLH = sgraph[d].LH;
bitset<MAXNODES> newdepLV = sgraph[d].LV;
bitset<MAXNODES> newdepLD = sgraph[d].LD;
newdepind = find_existing_node(sgraph[d].mnode, newdepprop, newdepLH, newdepLV);
if(debug) fprintf(stderr,"#DEP EXISTS %d\n",newdepind);
if( newdepind >= 0) // W£¡CZONE
sgraph[newdepind].LD |= newdepLD; // TYLKO DLA LD
else
{
newdepind = create_new_dep_node_right(d,newdepprop,newdepLH,newdepLD,newdepLV);
sgraph[newdepind].edge.clear();
sgraph[newdepind].edge_contains_self = false;
}
}
sgraph[newdepind].heads.push_back(Arc(newheadind,l.role,h,d)); sgraph[newd].heads.push_back(Arc(newh,l.role,h,d));
sgraph[newheadind].deps.push_back(Arc(newdepind,l.role,h,d)); sgraph[newh].deps.push_back(Arc(newd,l.role,h,d));
//sgraph[newdepind].edge.push_back(newheadind);
sgraph[newdepind].LH.set(newheadind); if(debug)
{
// sgraph[*d].prop.merge_boubbles(new_dep_boubbles); sgraph.print_arc(stderr,"link",newh,newd,l.role,1);
sgraph.print_node_debug(stderr,"",newh,h);
if(sgraph[newheadind].saturated()) sgraph[newdepind].LH |= sgraph[newheadind].LH; sgraph.print_node_debug(stderr,"",newd,d);
}
if(debug) sgraph.print_arc(stderr,"new link",newheadind,newdepind,l.role,1);
if(debug) sgraph.print_node_debug(stderr,"update",newheadind,h);
if(debug) sgraph.print_node_debug(stderr,"update",newdepind,d);
} }
//==================================================================================================== //====================================================================================================
// bool check_meeting_boubles(list<Boubble*>& hboubbles, list<Boubble*>& dboubbles)
// {
// bool hremove=false; // czy usun±æ ostatnio sprawdzany b±bel
// bool dremove=false; // czy usun±æ ostatnio sprawdzany b±bel
// for(list<Boubble*>::iterator hb = hboubbles.begin(); hb != hboubbles.end(); hb = hremove ? hboubbles.erase(hb) : ++hb )
// {
// hremove=false;
// for(list<Boubble*>::iterator db = dboubbles.begin(); db != dboubbles.end(); db = dremove ? dboubbles.erase(db) : ++db )
// {
// dremove=false;
// if( (*hb)->rel()==(*db)->rel() && (*hb)->dir()==DOWN && (*db)->dir()==UP && (*hb)->reverse()!=(*db)->reverse() )
// {
// int srcnode,dstnode;
// if( (*hb)->reverse()==false )
// srcnode = (*hb)->src(), dstnode = (*db)->src();
// else
// srcnode = (*db)->src(), dstnode = (*hb)->src();
// if( grammar.check_longrel(sgraph.cat(srcnode), sgraph.cat(dstnode), (*hb)->rel()) )
// {
// hremove=dremove=true;
// if(debug) fprintf(stderr,"BOUBBLES MET!!!\n");
// }
// else
// {
// if(debug) fprintf(stderr,"BOUBBLES' MEETING FAILED!!!\n");
// return false;
// }
// }
// }
// }
// return true;
// }
//====================================================================================================
bool check_meeting_boubles(list<Boubble*>& boubbles) bool check_meeting_boubles(list<Boubble*>& boubbles)
{ {
bool hremove=false; // czy usun±æ ostatnio sprawdzany b±bel bool hremove=false; // czy usun±æ ostatnio sprawdzany b±bel
bool dremove=false; // czy usun±æ ostatnio sprawdzany b±bel bool dremove=false; // czy usun±æ ostatnio sprawdzany b±bel
// cerr << "CHECKING MEETING BUBBLES" << endl;
for(list<Boubble*>::iterator hb = boubbles.begin(); hb != boubbles.end(); hb = hremove ? boubbles.erase(hb) : ++hb ) for(list<Boubble*>::iterator hb = boubbles.begin(); hb != boubbles.end(); hb = hremove ? boubbles.erase(hb) : ++hb )
{ {
cout << endl << "hb:" << **hb ;
hremove=false; hremove=false;
for(list<Boubble*>::iterator db = hb; db != boubbles.end(); db = dremove ? boubbles.erase(db) : ++db ) for(list<Boubble*>::iterator db = hb; db != boubbles.end(); db = dremove ? boubbles.erase(db) : ++db )
{ {
cout << " db:" << **db; // cerr << " db:" << **db;
dremove=false; dremove=false;
if( (*hb)->rel()==(*db)->rel() && (*hb)->reverse()!=(*db)->reverse() ) if( (*hb)->rel()==(*db)->rel() && (*hb)->reverse()!=(*db)->reverse() )
{ {
cout << "Z"; // cerr << "Z";
int srcnode,dstnode; int srcnode,dstnode;
if( (*hb)->reverse()==false ) if( (*hb)->reverse()==false )
srcnode = (*hb)->src(), dstnode = (*db)->src(); srcnode = (*hb)->src(), dstnode = (*db)->src();
@ -441,13 +251,13 @@ bool check_meeting_boubles(list<Boubble*>& boubbles)
srcnode = (*db)->src(), dstnode = (*hb)->src(); srcnode = (*db)->src(), dstnode = (*hb)->src();
if( grammar.check_longrel(sgraph.cat(srcnode), sgraph.cat(dstnode), (*hb)->rel()) ) if( grammar.check_longrel(sgraph.cat(srcnode), sgraph.cat(dstnode), (*hb)->rel()) )
{ {
cout << " REMOVE "; // cerr << " REMOVE ";
hremove=dremove=true; hremove=dremove=true;
if(debug) fprintf(stderr,"BOUBBLES MET!!!\n"); if(debug) fprintf(stderr,"BOUBBLES MET!!!\n");
} }
else else
{ {
cout << " FAIL "; // cerr << " FAIL ";
if(debug) fprintf(stderr,"BOUBBLES' MEETING FAILED!!!\n"); if(debug) fprintf(stderr,"BOUBBLES' MEETING FAILED!!!\n");
return false; return false;
} }
@ -470,7 +280,7 @@ bool check_boubbles_at_target(list<Boubble*>& boubbles, int node)
if( (*b)->is_at_target() ) if( (*b)->is_at_target() )
if( grammar.check_longrel(sgraph.cat((*b)->src()), sgraph.cat(node), (*b)->rel()) ) if( grammar.check_longrel(sgraph.cat((*b)->src()), sgraph.cat(node), (*b)->rel()) )
{ {
cout << endl << "REMOVE ChBatT " << **b << endl; // cerr << endl << "REMOVE ChBatT " << **b << endl;
remove=true; remove=true;
} }
else else
@ -488,12 +298,9 @@ void try_connect_dependents(int j)
LViterator lvi(sgraph,j); LViterator lvi(sgraph,j);
int i; int i;
while((i=lvi.next()) >= 0) while((i=lvi.next()) >= 0)
{
//if(debug) sgraph.print_node_debug(stderr,"D-CUR>",i,-1);
if(sgraph.saturated(i)) if(sgraph.saturated(i))
{ {
if(debug) {fprintf(stderr,"%d <--",i); } if(debug) {fprintf(stderr,"\t%d <-- %d",i,j); }
list<const Link*> ji_links = grammar.connectable2( sgraph.cat(j), sgraph.cat(i), sgraph[j].prop.flags, sgraph[i].prop.flags); // ref do Roles!!! list<const Link*> ji_links = grammar.connectable2( sgraph.cat(j), sgraph.cat(i), sgraph[j].prop.flags, sgraph[i].prop.flags); // ref do Roles!!!
list<const Link*>::iterator ri = ji_links.begin(); list<const Link*>::iterator ri = ji_links.begin();
@ -516,7 +323,7 @@ void try_connect_dependents(int j)
{ {
if(debug) fprintf(stderr," ...SUCCESS!\n"); if(debug) fprintf(stderr," ...SUCCESS!\n");
connect_left( j, i, **ri, new_head_boubbles, new_dep_boubbles); connect_left( j, i, **ri, new_head_boubbles, new_dep_boubbles);
lvi.update_edge(sgraph,i); // lvi.update_edge(sgraph,i);
} }
else else
{ if(debug) fprintf(stderr," ...boubbles failed\n"); } { if(debug) fprintf(stderr," ...boubbles failed\n"); }
@ -525,9 +332,7 @@ void try_connect_dependents(int j)
} }
} }
else else
if(debug) {fprintf(stderr,"%d <-- unsaturated\n",i); } if(debug) {fprintf(stderr,"\t%d <-- %d\t%d unsaturated\n",i,j,i); }
}
} }
//---------------------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------------------
@ -536,44 +341,41 @@ void try_connect_heads(int j)
LViterator lvi(sgraph,j); LViterator lvi(sgraph,j);
int i; int i;
while((i=lvi.next()) >= 0) while((i=lvi.next()) >= 0)
{ if(sgraph.saturated(j))
// if(debug) sgraph.print_node_debug(stderr,"H-CUR> ",i,-1); {
if(sgraph.saturated(j)) if(debug) fprintf(stderr, "\t%d --> %d",i,j);
{
if(debug) fprintf(stderr, "%d -->",i);
list<const Link*> ij_links = grammar.connectable2( sgraph.cat(i), sgraph.cat(j), sgraph[i].prop.flags, sgraph[j].prop.flags ); list<const Link*> ij_links = grammar.connectable2( sgraph.cat(i), sgraph.cat(j), sgraph[i].prop.flags, sgraph[j].prop.flags );
list<const Link*>::iterator ri = ij_links.begin(); list<const Link*>::iterator ri = ij_links.begin();
if(ri == ij_links.end()) { if(debug) fprintf(stderr," no roles\n"); } if(ri == ij_links.end()) { if(debug) fprintf(stderr," no roles\n"); }
else else
{ {
for(; ri != ij_links.end(); ++ri ) for(; ri != ij_links.end(); ++ri )
{ {
if(debug) fprintf(stderr," %s",(*ri)->role.str()); if(debug) fprintf(stderr," %s",(*ri)->role.str());
if( !grammar.check_constr2( sgraph[i].prop, sgraph[j].prop, 1, **ri ) ) if( !grammar.check_constr2( sgraph[i].prop, sgraph[j].prop, 1, **ri ) )
{ if(debug) fprintf(stderr," ...constraints failed\n"); } { if(debug) fprintf(stderr," ...constraints failed\n"); }
else else
{ {
list<Boubble*> new_head_boubbles = collect_head_boubbles(i,j,(*ri)->role); list<Boubble*> new_head_boubbles = collect_head_boubbles(i,j,(*ri)->role);
list<Boubble*> new_dep_boubbles = collect_dep_boubbles(i,j,(*ri)->role); list<Boubble*> new_dep_boubbles = collect_dep_boubbles(i,j,(*ri)->role);
if( check_meeting_boubles(new_head_boubbles) && if( check_meeting_boubles(new_head_boubbles) &&
check_meeting_boubles(new_dep_boubbles) && check_meeting_boubles(new_dep_boubbles) &&
check_boubbles_at_target(new_head_boubbles,i) && check_boubbles_at_target(new_head_boubbles,i) &&
check_boubbles_at_target(new_dep_boubbles,j) ) check_boubbles_at_target(new_dep_boubbles,j) )
{ {
if(debug) fprintf(stderr," ...SUCCESS!\n"); if(debug) fprintf(stderr," ...SUCCESS!\n");
connect_right( i, j, **ri, new_head_boubbles, new_dep_boubbles ); connect_right( i, j, **ri, new_head_boubbles, new_dep_boubbles );
} }
else else
{ if(debug) fprintf(stderr," ...bubbles failed\n",i); } { if(debug) fprintf(stderr," ...bubbles failed\n",i); }
} }
} }
} }
} }
else else
if(debug) {fprintf(stderr,"%d <-- unsaturated\n",j); } if(debug) {fprintf(stderr,"\t* <-- %d unsaturated\n",j); }
}
} }
//==================================================================================================== //====================================================================================================
@ -661,13 +463,13 @@ void dgp1()
set_initial_constraints(basenode); set_initial_constraints(basenode);
nodelist.push_back(basenode); nodelist.push_back(basenode);
if(debug) sgraph.print_node_debug(stderr,"add base",basenode,-1); // STDOUT!!! if(debug) sgraph.print_node_debug(stderr,"node",basenode,-1); // STDOUT!!!
// if(debug) print_sets(basenode); // if(debug) print_sets(basenode);
list<int>::iterator cursor=processed; list<int>::iterator cursor=processed;
while(++cursor != nodelist.end()) while(++cursor != nodelist.end())
{ {
if(debug) sgraph.print_node_debug(stderr,"MAIN-CUR> ",*cursor,-1); if(debug) sgraph.print_node_debug(stderr,"CUR>",*cursor,-1);
try_connect_dependents(*cursor); try_connect_dependents(*cursor);
try_connect_heads(*cursor); try_connect_heads(*cursor);
processed=cursor; processed=cursor;

View File

@ -1,5 +1,5 @@
#ifndef _DGP0_HH #ifndef _DGP1_HH
#define _DGP0_HH #define _DGP1_HH
#include "grammar.hh" #include "grammar.hh"
#include "sgraph.hh" #include "sgraph.hh"

View File

@ -1,4 +1,3 @@
#include "global.hh"
#include "sgraph.hh" #include "sgraph.hh"
#include "grammar.hh" #include "grammar.hh"
#include "const.hh" #include "const.hh"
@ -15,21 +14,21 @@ int SGraph::add_base_snode(int mnodeind)
newnode.mnode=mnodeind; newnode.mnode=mnodeind;
for(vector<int>::iterator pm=mgraph[newnode.mnode].pred.begin(); pm!=mgraph[newnode.mnode].pred.end(); ++pm) // for(vector<int>::iterator pm=mgraph[newnode.mnode].pred.begin(); pm!=mgraph[newnode.mnode].pred.end(); ++pm)
for(vector<int>::iterator ps=mgraph[*pm].snodes.begin(); ps!=mgraph[*pm].snodes.end(); ++ps) // for(vector<int>::iterator ps=mgraph[*pm].snodes.begin(); ps!=mgraph[*pm].snodes.end(); ++ps)
if(nodes[*ps].in_LH) // if(nodes[*ps].in_LH)
{ // {
newnode.LV.set(*ps); // newnode.LV.set(*ps);
if(nodes[*ps].saturated()) newnode.LV |= nodes[*ps].LH; // if(nodes[*ps].saturated()) newnode.LV |= nodes[*ps].LH;
} // }
mgraph[newnode.mnode].snodes.push_back(lastnodeind()); mgraph[newnode.mnode].snodes.push_back(lastnodeind());
newnode.in_LH=true; // newnode.in_LH=true;
newnode.edge.push_back(lastnodeind()); // newnode.edge.push_back(lastnodeind());
newnode.edge_contains_self = true ; newnode.edge.insert_self();
return lastnodeind(); return lastnodeind();
} }
@ -54,13 +53,14 @@ void SGraph::update_right(int headind, int depind)
//==================================================================================================== //====================================================================================================
int SGraph::clone(int ancind, NodeProp newprop) int SGraph::clone(int ancind, NodeProp newprop, Edge edge)
{ {
SNode &newnode=makenewnode(); SNode &newnode=makenewnode();
SNode &ancnode = nodes[ancind]; SNode &ancnode = nodes[ancind];
newnode.prop=newprop; newnode.prop = newprop;
newnode.mnode=ancnode.mnode; newnode.edge = edge;
newnode.mnode = ancnode.mnode;
mgraph[newnode.mnode].snodes.push_back(lastnodeind()); mgraph[newnode.mnode].snodes.push_back(lastnodeind());
return lastnodeind(); return lastnodeind();
@ -89,9 +89,9 @@ int SGraph::print_node_debug(FILE* f, const char* pref, int n, int anc)
void SGraph::print_arc(FILE* f, const char* msg, int head, int dep, Role role, int dir) // 0 - left, 1 - right void SGraph::print_arc(FILE* f, const char* msg, int head, int dep, Role role, int dir) // 0 - left, 1 - right
{ {
if(dir==0) if(dir==0)
fprintf(f,"%s %s:%d <-- %d\n", msg, role.str(), dep, head); fprintf(f,"%s\t%d <-- %d\t%s\n", msg, dep, head, role.str());
else else
fprintf(f,"%s %s:%d --> %d\n", msg, role.str(), head, dep); fprintf(f,"%s\t%d --> %d\t%s\n", msg, head, dep, role.str());
} }
//==================================================================================================== //====================================================================================================
@ -171,15 +171,17 @@ int SGraph::sprint_node(char* buf, int nodeind, int anc, unsigned int info)
int SGraph::sprint_node_debug(char* buf, const char* pref, int n, int anc) int SGraph::sprint_node_debug(char* buf, const char* pref, int n, int anc)
{ {
char *buf0 = buf; char *buf0 = buf;
buf+=sprintf(buf,"%-10s",pref); buf+=sprintf(buf,"%-8s",pref);
buf+=sprintf(buf,"%d.%s",n,form(n)); buf+=sprintf(buf,"%d.%s",n,form(n));
buf+=sprintf(buf,";"); buf+=sprintf(buf,";");
buf+=sprintf(buf,"%s ",cat(n).str()); buf+=sprintf(buf,"%s ",cat(n).str());
while(buf-buf0<40) buf+=sprintf(buf," "); while(buf-buf0<40) buf+=sprintf(buf," ");
buf+=sprint_node(buf,n,anc,HEADS|DEPS|SETS|CONSTRAINTS); buf+=sprint_node(buf,n,anc,HEADS|DEPS|CONSTRAINTS);
buf+=sprintf(buf,"/"); buf+=sprintf(buf,"/");
for(vector<int>::iterator e = nodes[n].edge.begin(); e != nodes[n].edge.end(); e++ ) if(nodes[n].edge.self())
buf += sprintf(buf,"* ");
for(list<int>::iterator e = nodes[n].edge.others().begin(); e != nodes[n].edge.others().end(); e++ )
buf += sprintf(buf,"%d ", *e); buf += sprintf(buf,"%d ", *e);
buf+=sprintf(buf,"\n"); buf+=sprintf(buf,"\n");

View File

@ -12,7 +12,7 @@
#include "mgraph.hh" #include "mgraph.hh"
#include "thesymbols.hh" #include "thesymbols.hh"
#include "boubble.hh" #include "boubble.hh"
#include "global.hh"
using namespace std; using namespace std;
@ -115,43 +115,11 @@ void NodeProp::copy(const NodeProp& p)
boubbles.push_back(new Boubble(**b)); boubbles.push_back(new Boubble(**b));
} }
//---------------------------------------------------------------------------------------------------- inline NodeProp::~NodeProp() { clear_boubbles(); }
inline NodeProp::NodeProp() { clear(); }
inline inline NodeProp::NodeProp(const NodeProp& p) { copy(p); }
NodeProp::~NodeProp() inline NodeProp& NodeProp::operator=(const NodeProp& p) { clear(); copy(p); return *this; }
{ inline void NodeProp::clear()
clear_boubbles();
}
//----------------------------------------------------------------------------------------------------
inline
NodeProp::NodeProp()
{
clear();
}
//----------------------------------------------------------------------------------------------------
inline
NodeProp::NodeProp(const NodeProp& p)
{
copy(p);
}
//----------------------------------------------------------------------------------------------------
inline
NodeProp& NodeProp::operator=(const NodeProp& p)
{
clear();
copy(p);
return *this;
}
//----------------------------------------------------------------------------------------------------
inline
void NodeProp::clear()
{ {
required.reset(); required.reset();
forbidden.reset(); forbidden.reset();
@ -161,6 +129,31 @@ void NodeProp::clear()
clear_boubbles(); clear_boubbles();
} }
//====================================================================================================
// CLASS Edge
//====================================================================================================
class Edge
{
public:
Edge() : _self(false) { }
Edge(const Edge& e, int map_self) { assign(e,map_self); }
bool self() const { return _self; }
list<int>& others() { return _others; }
void insert_self(bool b=true) { _self=b; }
void insert(int n) { list<int>::iterator i=others().begin(); while(i!=others().end() && *i<n) ++i; others().insert(i,n);}
void insert(list<int> l) { for(list<int>::const_iterator i=l.begin(); i!=l.end(); i++) insert(*i); }
void assign(const Edge& e, int map_self=-1) { _others = e._others; if(e.self()) { _self = false; insert(map_self); } }
const bool operator==(const Edge& e) const { return _self == e._self && _others == e._others; }
private:
bool _self;
list<int> _others;
};
//==================================================================================================== //====================================================================================================
// CLASS SNode // CLASS SNode
//==================================================================================================== //====================================================================================================
@ -174,8 +167,7 @@ struct SNode
NodeProp prop; NodeProp prop;
vector<int> edge; Edge edge;
bool edge_contains_self;
bool visible_as_neighbour; bool visible_as_neighbour;
bitset<MAXNODES> LV; bitset<MAXNODES> LV;
@ -188,6 +180,14 @@ struct SNode
void clear(); void clear();
bool saturated(); bool saturated();
// void edge_clear() { edge.clear(); edge_contains_self=false;}
// void edge_set(int i) { edge.clear(); edge_contains_self=false; edge.push_back(i); }
// void edge_set(vector<int>& v) { edge.assign(v.begin(),v.end()); edge_contains_self=false; }
// void edge_set_self(bool b=true) { edge.clear(); edge_contains_self=b; }
// void edge_add(int i) { edge.push_back(i); }
// void edge_add(vector<int>& v) { edge.insert(edge.end(),v.begin(),v.end()); }
// void edge_add_self(bool b=true) { edge_contains_self=b; }
}; };
//---------------------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------------------
@ -215,7 +215,7 @@ public:
void clear() { nodes.clear(); } void clear() { nodes.clear(); }
int add_base_snode(int mnodeind); int add_base_snode(int mnodeind);
int clone(int ancind, NodeProp newprop); int clone(int ancind, NodeProp newprop, Edge edge);
void update_left(int headind, int depind); void update_left(int headind, int depind);
void update_right(int headind, int depind); void update_right(int headind, int depind);
bool visible(int left, int right); bool visible(int left, int right);
@ -298,7 +298,7 @@ public:
void update_edge(SGraph& sg, int e); void update_edge(SGraph& sg, int e);
private: private:
int snode;
SGraph& sgraph; SGraph& sgraph;
MGraph& mgraph; MGraph& mgraph;
stack<int> waydown; stack<int> waydown;
@ -311,28 +311,31 @@ private:
}; };
inline LViterator::LViterator(SGraph& sg, int n, bool s=true) : sgraph(sg), mgraph(sg.mgraph), strict(s) inline LViterator::LViterator(SGraph& sg, int n, bool s=true) : snode(n), sgraph(sg), mgraph(sg.mgraph), strict(s)
{ {
if(sg[n].edge_contains_self) // TO DODAÆ PO PRZEJ¦CIU NA EDGE_CONTAINS_SELF if(sg[n].edge.self())
{ {
push_ld(n); push_ld(n);
push_ln(n); push_ln(n);
} }
for(vector<int>::iterator i=sg[n].edge.begin(); i!=sg[n].edge.end(); ++i) for(list<int>::iterator i=sg[n].edge.others().begin(); i!=sg[n].edge.others().end(); ++i)
{ {
if(*i != n) push_ld(*i);
{ push_ln(*i);
push_ld(*i);
push_ln(*i);
}
} }
} }
inline void LViterator::update_edge(SGraph& sg, int n) inline void LViterator::update_edge(SGraph& sg, int n)
{ {
for(vector<int>::iterator i=sg[n].edge.begin(); i!=sg[n].edge.end(); ++i) if(sg[n].edge.self())
{
push_ld(n);
push_ln(n);
}
for(list<int>::iterator i=sg[n].edge.others().begin(); i!=sg[n].edge.others().end(); ++i)
{ {
push_ld(*i); push_ld(*i);
push_ln(*i); push_ln(*i);
@ -344,7 +347,10 @@ inline int LViterator::next()
if(wayup.empty()) if(wayup.empty())
{ {
if(waydown.empty()) if(waydown.empty())
return -1; // {
if(debug) fprintf(stderr,"\t\tLViterator(%d)\treturn %d\n",snode,-1);
return -1; //
}
else else
{ {
int k = waydown.top(); int k = waydown.top();
@ -352,12 +358,16 @@ inline int LViterator::next()
push_ld(k); push_ld(k);
push_ln(k); push_ln(k);
if(wayup.empty()) if(wayup.empty())
return -1; // k NIE MA POPRZEDNIKÓW, NIE MO¯E TE¯ ZATEM MIEÆ LEWOSTRONNYCH PODRZÊDNIKÓW {
if(debug) fprintf(stderr,"\t\tLViterator(%d)\treturn %d\n",snode,-1);
return -1; // k NIE MA POPRZEDNIKÓW, NIE MO¯E TE¯ ZATEM MIEÆ LEWOSTRONNYCH PODRZÊDNIKÓW
}
else else
{ {
int i = wayup.top(); int i = wayup.top();
wayup.pop(); wayup.pop();
push_lh(i); push_lh(i);
if(debug) fprintf(stderr,"\t\tLViterator(%d)\treturn %d\n",snode,i);
return i; return i;
} }
} }
@ -368,6 +378,7 @@ inline int LViterator::next()
int i = wayup.top(); int i = wayup.top();
wayup.pop(); wayup.pop();
push_lh(i); push_lh(i);
if(debug) fprintf(stderr,"\t\tLViterator(%d)\treturn %d\n",snode,i);
return i; return i;
}; };
} }
@ -377,7 +388,10 @@ inline void LViterator::push_ld(int i)
vector<Arc>& arcs = sgraph[i].deps; vector<Arc>& arcs = sgraph[i].deps;
for(vector<Arc>::iterator a = arcs.begin(); a != arcs.end(); ++a) for(vector<Arc>::iterator a = arcs.begin(); a != arcs.end(); ++a)
if(mgraph[sgraph[a->dst].mnode].pos < mgraph[sgraph[i].mnode].pos) if(mgraph[sgraph[a->dst].mnode].pos < mgraph[sgraph[i].mnode].pos)
push(waydown,a->dst); {
push(waydown,a->dst);
if(debug) fprintf(stderr,"\t\tLViterator(%d)\tPUSH_LD waydown %d\n",snode,a->dst);
}
} }
inline void LViterator::push_lh(int i) inline void LViterator::push_lh(int i)
@ -385,7 +399,10 @@ inline void LViterator::push_lh(int i)
vector<Arc>& arcs = sgraph[i].heads; vector<Arc>& arcs = sgraph[i].heads;
for(vector<Arc>::iterator a = arcs.begin(); a != arcs.end(); ++a) for(vector<Arc>::iterator a = arcs.begin(); a != arcs.end(); ++a)
if(mgraph[sgraph[a->dst].mnode].pos < mgraph[sgraph[i].mnode].pos) if(mgraph[sgraph[a->dst].mnode].pos < mgraph[sgraph[i].mnode].pos)
push(wayup,a->dst); {
push(wayup,a->dst);
if(debug) fprintf(stderr,"\t\tLViterator(%d)\tPUSH_LH wayup %d\n",snode,a->dst);
}
} }
inline void LViterator::push_ln(int i) inline void LViterator::push_ln(int i)
@ -396,7 +413,10 @@ inline void LViterator::push_ln(int i)
vector<int>& spredecessors = mgraph[*mp].snodes; vector<int>& spredecessors = mgraph[*mp].snodes;
for(vector<int>::iterator sp = spredecessors.begin(); sp != spredecessors.end(); ++sp ) for(vector<int>::iterator sp = spredecessors.begin(); sp != spredecessors.end(); ++sp )
if(sgraph[*sp].visible_as_neighbour || !strict) if(sgraph[*sp].visible_as_neighbour || !strict)
push(wayup,*sp); {
push(wayup,*sp);
if(debug) fprintf(stderr,"\t\tLViterator(%d)\tPUSH_LN wayup %d\n",snode, *sp);
}
} }
} }
@ -412,16 +432,15 @@ public:
int next(); int next();
private: private:
int snode;
SGraph& sgraph; SGraph& sgraph;
MGraph& mgraph; MGraph& mgraph;
int thenode;
stack<int> wayup; stack<int> wayup;
void push_ln(int i); void push_ln(int i);
}; };
inline LNiterator::LNiterator(SGraph& sg, int n) : sgraph(sg), mgraph(sg.mgraph), thenode(n) inline LNiterator::LNiterator(SGraph& sg, int n) : sgraph(sg), mgraph(sg.mgraph), snode(n)
{ {
push_ln(n); push_ln(n);
} }
@ -429,11 +448,15 @@ inline LNiterator::LNiterator(SGraph& sg, int n) : sgraph(sg), mgraph(sg.mgraph)
inline int LNiterator::next() inline int LNiterator::next()
{ {
if(wayup.empty()) if(wayup.empty())
return -1; {
if(debug) fprintf(stderr,"\t\tLNiterator(%d)\treturn %d\n",snode,-1);
return -1;
}
else else
{ {
int i = wayup.top(); int i = wayup.top();
wayup.pop(); wayup.pop();
if(debug) fprintf(stderr,"\t\tLNiterator(%d)\treturn %d\n",snode,i);
return i; return i;
}; };
} }
@ -445,7 +468,10 @@ inline void LNiterator::push_ln(int i)
{ {
vector<int>& spredecessors = mgraph[*mp].snodes; vector<int>& spredecessors = mgraph[*mp].snodes;
for(vector<int>::iterator sp = spredecessors.begin(); sp != spredecessors.end(); ++sp ) for(vector<int>::iterator sp = spredecessors.begin(); sp != spredecessors.end(); ++sp )
{
wayup.push(*sp); wayup.push(*sp);
if(debug) fprintf(stderr,"\t\tLNiterator(%d)\tPUSH %d\n",snode,-1);
}
} }
} }
@ -461,7 +487,7 @@ public:
int next(); int next();
private: private:
int snode;
SGraph& sgraph; SGraph& sgraph;
MGraph& mgraph; MGraph& mgraph;
stack<int> wayup; stack<int> wayup;
@ -469,7 +495,7 @@ private:
void push_lh(int i); void push_lh(int i);
}; };
inline LHiterator::LHiterator(SGraph& sg, int n) : sgraph(sg), mgraph(sg.mgraph) inline LHiterator::LHiterator(SGraph& sg, int n) : snode(n), sgraph(sg), mgraph(sg.mgraph)
{ {
push_lh(n); push_lh(n);
} }
@ -492,7 +518,10 @@ inline void LHiterator::push_lh(int i)
vector<Arc>& arcs = sgraph[i].heads; vector<Arc>& arcs = sgraph[i].heads;
for(vector<Arc>::iterator a = arcs.begin(); a != arcs.end(); ++a) for(vector<Arc>::iterator a = arcs.begin(); a != arcs.end(); ++a)
if(mgraph[sgraph[a->dst].mnode].pos < mgraph[sgraph[i].mnode].pos) if(mgraph[sgraph[a->dst].mnode].pos < mgraph[sgraph[i].mnode].pos)
wayup.push(a->dst); {
wayup.push(a->dst);
if(debug) fprintf(stderr,"\t\tLHiterator(%d)\tPUSH %d\n",snode,-1);
}
} }
//---------------------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------------------
@ -506,16 +535,15 @@ public:
int next(); int next();
private: private:
int snode;
SGraph& sgraph; SGraph& sgraph;
MGraph& mgraph; MGraph& mgraph;
int thenode;
stack<int> waydown; stack<int> waydown;
void push_ld(int i); void push_ld(int i);
}; };
inline LDiterator::LDiterator(SGraph& sg, int n) : sgraph(sg), mgraph(sg.mgraph), thenode(n) inline LDiterator::LDiterator(SGraph& sg, int n) : sgraph(sg), mgraph(sg.mgraph), snode(n)
{ {
push_ld(n); push_ld(n);
} }
@ -537,10 +565,11 @@ inline void LDiterator::push_ld(int i)
{ {
vector<Arc>& arcs = sgraph[i].deps; vector<Arc>& arcs = sgraph[i].deps;
for(vector<Arc>::iterator a = arcs.begin(); a != arcs.end(); ++a) for(vector<Arc>::iterator a = arcs.begin(); a != arcs.end(); ++a)
if(mgraph[sgraph[a->dst].mnode].pos < mgraph[sgraph[thenode].mnode].pos) if(mgraph[sgraph[a->dst].mnode].pos < mgraph[sgraph[snode].mnode].pos)
waydown.push(a->dst); {
waydown.push(a->dst);
if(debug) fprintf(stderr,"\t\tLDiterator(%d)\tPUSH %d\n",snode,-1);
}
} }
#endif #endif

View File

@ -53,7 +53,7 @@ EOS {POINT}|{POINTS}|{QMARK}|{EXCL}
.* { ECHO; set_position(); } .* { ECHO; set_position(); }
<<EOF>> { printf("%04d 00 EOS *\n",pos+len); exit(1); } <<EOF>> { printf("%04d 00 EOS *\n",pos+len); exit(0); }
%% %%

View File

@ -12,39 +12,54 @@ opts = GetoptLong.new(
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ], [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
[ '--format', '-F', GetoptLong::REQUIRED_ARGUMENT ], [ '--format', '-F', GetoptLong::REQUIRED_ARGUMENT ],
[ '--info', '-I', GetoptLong::REQUIRED_ARGUMENT ], [ '--info', '-I', GetoptLong::REQUIRED_ARGUMENT ],
[ '--dgpids', GetoptLong::NO_ARGUMENT ],
[ '--graph', GetoptLong::NO_ARGUMENT ],
[ '--uniq', '-u', GetoptLong::NO_ARGUMENT ],
[ '--utt', GetoptLong::NO_ARGUMENT ],
[ '--span', '-s', GetoptLong::REQUIRED_ARGUMENT ], [ '--span', '-s', GetoptLong::REQUIRED_ARGUMENT ],
[ '--maxsize', GetoptLong::REQUIRED_ARGUMENT ], [ '--maxsize', GetoptLong::REQUIRED_ARGUMENT ],
[ '--forest', GetoptLong::NO_ARGUMENT ], [ '--forest', GetoptLong::NO_ARGUMENT ],
[ '--ground', GetoptLong::NO_ARGUMENT ],
[ '--only-trees','-t', GetoptLong::NO_ARGUMENT ]) [ '--only-trees','-t', GetoptLong::NO_ARGUMENT ])
$helptext= $helptext = <<END
"The program generates trees from the graph output by dgp. dgp must\n"+ The program generates trees from the graph output by dgp. dgp must be run
"must be run with '--info=ds' option.\n\n"+ with '--info=ds' option.
"Command: tre [options]\n\n"+
"Options:\n"+ Command: tre [options]
"--help -h Print help (this text) and exit.\n"+
"--debug -d Verbose output. For developers only.\n"+ Options:
"--format=s -F s Output format. Recognized values:\n"+ --help -h Print help (this text) and exit.
" a root + list of arcs\n"+ --debug -d Verbose output. For developers only.
" p parenthesized notation\n"+ --format=s -F s Output format. Recognized values:
" h human readable indented tree format\n"+ a root + list of arcs
" Multiple values are allowed. (default p)\n"+ p parenthesized notation
"--info=s -I s Information printed. Recognized values:\n"+ h human readable indented format
" n node identifier\n"+ c CONLL format
" f surface form\n"+ Multiple values are allowed. (default p)
" m morphological information\n"+ --info=s -I s Information printed. Recognized values:
" l arc labels\n"+ n node identifier
"--only-trees -t Do not copy input. Print trees only.\n" f surface form
m morphological information
l arc labels\
--gphids Used gph node identifiers (default: linear)
--dgpids Used dgp node identifiers (default: linear)
--graph Do not generate trees, just print the graph.
--uniq -u Remove duplicate trees.
--utt UTT formatted output.
END
$DEBUG=false $DEBUG=false
$FORMAT='p' $FORMAT='p'
$INFO='DEFAULT' $INFO='DEFAULT'
$ONLYTREES=false $UTTOUTPUT=false
$START=nil $START=nil
$END=nil $END=nil
$FOREST=false $FOREST=false
$MAXSIZE=nil $MAXSIZE=nil
$GPHIDS=false
$DGPIDS=false
$GRAPH==false
opts.each do |opt, arg| opts.each do |opt, arg|
case opt case opt
@ -57,12 +72,18 @@ opts.each do |opt, arg|
$FORMAT=arg $FORMAT=arg
when '--info' when '--info'
$INFO=arg $INFO=arg
when '--only-trees' when '--gphids'
$ONLYTREES=true $GPHIDS=true
when '--dgpids'
$DGPIDS=true
when '--graph'
$GRAPH=true
when '--uniq'
$UNIQ=true
when '--utt'
$UTTOUTPUT=true
when '--forest' when '--forest'
$FOREST=true $FOREST=true
when '--ground'
$GROUND=true
when '--maxsize' when '--maxsize'
$MAXSIZE=arg.to_i $MAXSIZE=arg.to_i
when '--span' when '--span'
@ -75,7 +96,7 @@ end
if $INFO=='DEFAULT' if $INFO=='DEFAULT'
case $FORMAT case $FORMAT
when 'p','a' when 'p','a'
$INFO='nl' $INFO='fl'
when 'h' when 'h'
$INFO='fmnl' $INFO='fmnl'
end end
@ -94,7 +115,7 @@ def tre(input)
tokennumber=0 tokennumber=0
for line in input for line in input
seg=Seg.new(line) seg=Seg.new(line)
print line unless $ONLYTREES || seg.field(3) == 'EOS' print line if $UTTOUTPUT && seg.field(3) == 'EOS'
if dgp=seg['dgp'] if dgp=seg['dgp']
if nodes==[] && seg[3]!='BOS' if nodes==[] && seg[3]!='BOS'
@ -121,23 +142,22 @@ def tre(input)
if seg[3]=='EOS' if seg[3]=='EOS'
$pref = "#{seg[1]} #{seg[2]} SYN *" $pref = "#{seg[1]} #{seg[2]} SYN *"
parsegraph(nodes) parsegraph(nodes)
set_ord #(0...(nodes.length)).each{|i| set_distance_from_i i } set_ord #(0...(nodes.length)).each{|i| set_distance_from_i i }
printgraph if $DEBUG printgraph if $DEBUG
if $GRAPH
if $GROUND if $FORMAT =~ /c/
printground printconll
else
printground
end
else else
thetrees = $FOREST ? genforest : gentrees thetrees = $FOREST ? genforest : gentrees
outputs = output_trees thetrees
output_trees thetrees outputs = outputs.sort.uniq if $UNIQ
print outputs.join
print line unless $ONLYTREES print line if $UTTOUTPUT
$gphid=[]
$gphid=[] # POWTÓRZENIE
$form=[] $form=[]
$lem=[] $lem=[]
$ord1=[] $ord1=[]
@ -153,29 +173,47 @@ end
def output_trees trees def output_trees trees
outputs = []
for t in trees for t in trees
$count += 1 $count += 1
# t1=ground(t) # t1=ground(t)
t1=t t1=t
span = $FOREST ? " span:" + (ground_tree_min(t1).to_s + ","+ground_tree_max(t1).to_s)+";" : "" # span = $FOREST ? " span:" + (ground_tree_min(t1).to_s + ","+ground_tree_max(t1).to_s)+";" : ""
# case $FORMAT
# when /a/
# outputs << "#{$pref} tre:#{$count}#{span} #{arc_output(t1)}\n"
# when /p/
# outputs << "#{$pref}#{span} tre:#{$count} par:#{par_output(t1)}\n"
# when /h/
# outputs << "#\n# tree #{$count}\n# ------\n#{dgp_output(t1,0)}"
# when /c/
# outputs << conll_output(t1,0)
# end
case $FORMAT case $FORMAT
when /a/ when /a/
print "#{$pref} tre:#{$count}#{span} #{arcsinfo(t1[0],t1[1])}" outputs << "#{arc_output(t1)}\n"
# print arcsinfo(t1[0],t1[1])
print "\n"
when /p/ when /p/
print "#{$pref}#{span} tre:#{$count} par:" outputs << "#{par_output(t1)}\n"
printpar(t1[0],t1[1])
print "\n"
when /h/ when /h/
print "#\n# tree #{$count}\n# ------\n" outputs << human_output(t1,0)
printtree_dgp(t1[0],t1[1],0) when /c/
outputs << conll_output(t1,0)
end end
end end
outputs
end end
def id_output id
if $DGPIDS then id elsif $GPHIDS then $gphid[id] else $ord1[$gphid[id]] end
end
def nodeinfo(id) def nodeinfo(id)
info="" info=""
@ -185,7 +223,7 @@ def nodeinfo(id)
info += '.' if $INFO =~ /[nfm]/ info += '.' if $INFO =~ /[nfm]/
end end
if $INFO =~ /n/ if $INFO =~ /n/
info += gphid.to_s info += id_output(id).to_s
info += '.' if $INFO =~ /[fm]/ info += '.' if $INFO =~ /[fm]/
end end
if $INFO =~ /f/ if $INFO =~ /f/
@ -199,13 +237,9 @@ def nodeinfo(id)
end end
def arcsinfo(root,arcs) def arc_output(tree)
root, arcs = tree
"head:#{nodeinfo(root)} links:" + arcs.map{|a| "(#{($INFO =~ /l/) ? a[2]+":" : ""}#{nodeinfo(a[0])}-#{nodeinfo(a[1])})"}.join("") "head:#{nodeinfo(root)} links:" + arcs.map{|a| "(#{($INFO =~ /l/) ? a[2]+":" : ""}#{nodeinfo(a[0])}-#{nodeinfo(a[1])})"}.join("")
# for a in arcs
# print ';'
# print "#{a[2]}:" if $INFO =~ /l/
# print nodeinfo(a[0])+'-'+nodeinfo(a[1])
# end
end end
def printtree(root,arcs,o) def printtree(root,arcs,o)
@ -220,54 +254,68 @@ def printtree(root,arcs,o)
end end
end end
def printtree_dgp(root,arcs,o) def human_output(tree,o)
root, arcs = tree
output = ''
if o==0 if o==0
print "%-16s" % "root: " output += "%-16s" % "root: "
end end
print nodeinfo(root),"\n" output += nodeinfo(root) + "\n"
for arc in arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] } for arc in arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
print " "*(o+1) output += " "*(o+1)
print "%-16s" % (arc[2]+": ") output += "%-16s" % (arc[2]+": ")
printtree_dgp(arc[1],arcs,o+1) output += human_output([arc[1],arcs],o+1)
end end
output
end end
# old: def conll_output(tree,o)
# def printpar(root,arcs) root,arcs = tree
# print nodeinfo(root) nodes = ([root] + arcs.map{|a| a[1]}).sort{|a,b| $gphid[a] <=> $gphid[b]}
# deps = arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] } conll_lines = []
# unless deps == [] for i in nodes
# print '(' gphid = $gphid[i]
# cont=false id = $ord1[gphid]
# for arc in deps form = $form[gphid]
# if cont then print ',' else cont=true end /^(?<lemma>.*),(?<cpostag>[^\/]*)(\/(?<feats>.+))?/ =~ $lem[gphid]
# print arc[2],':' if $INFO =~ /l/ thearcs = arcs.select{|a| a[1]==i }.map{|a| [$ord1[$gphid[a[0]]],a[2]] }
# printpar(arc[1],arcs) thearcs = [[0,'root']] if thearcs.empty?
# end for a in thearcs
# print ')' head,deprel = a
# end conll_lines << [id,form,lemma,cpostag,cpostag,feats,head,deprel,nil,nil].map{|s| s ? s.to_s : "_"}.join("\t")
# end end
end
def printpar(root,arcs) conll_lines.join("\n") + "\n\n"
end
def par_output(tree)
root, arcs = tree
ldeps = arcs.select{|a| a[0]==root and $gphid[a[1]] < $gphid[root]}.sort{|a,b| $gphid[a[1]]<=>$gphid[b[1]] } ldeps = arcs.select{|a| a[0]==root and $gphid[a[1]] < $gphid[root]}.sort{|a,b| $gphid[a[1]]<=>$gphid[b[1]] }
rdeps = arcs.select{|a| a[0]==root and $gphid[a[1]] > $gphid[root]}.sort{|a,b| $gphid[a[1]]<=>$gphid[b[1]] } rdeps = arcs.select{|a| a[0]==root and $gphid[a[1]] > $gphid[root]}.sort{|a,b| $gphid[a[1]]<=>$gphid[b[1]] }
for arc in ldeps output = ''
print ' ('
print arc[2].upcase if $INFO =~ /l/
printpar(arc[1],arcs)
print ')'
end
print ' ',nodeinfo(root) output_left = ldeps.map{|arc| ' (' + (($INFO =~ /l/) ? arc[2].upcase : '') + par_output([arc[1],arcs]) + ')'}.join
output_right = rdeps.map{|arc| ' (' + (($INFO =~ /l/) ? arc[2].upcase : '') + par_output([arc[1],arcs]) + ')'}.join
# for arc in ldeps
# output += ' ('
# output += arc[2].upcase if $INFO =~ /l/
# output += par_output(arc[1],arcs)
# output += ')'
# end
# print ' ',nodeinfo(root)
# for arc in rdeps
# print ' ('
# print arc[2].upcase if $INFO =~ /l/
# printpar(arc[1],arcs)
# print ')'
# end
output_left + ' ' + nodeinfo(root) + output_right
for arc in rdeps
print ' ('
print arc[2].upcase if $INFO =~ /l/
printpar(arc[1],arcs)
print ')'
end
end end
@ -466,6 +514,21 @@ def printground
end end
end end
def printconll
for i in 1...($form.length-1)
id = $ord1[i]
form = $form[i]
/^(?<lemma>.*),(?<cpostag>[^\/]*)(\/(?<feats>.+))?/ =~ $lem[i]
arcs = $arcs.select{|a| $ord1[$gphid[a[1]]] == $ord1[i]}.map{|a| [$ord1[$gphid[a[0]]],a[2]]}.sort.uniq
arcs = [[0,'root']] if arcs.empty?
for a in arcs
head,deprel = a
puts [id,form,lemma,cpostag,cpostag,feats,head,deprel,nil,nil].map{|s| s ? s.to_s : "_"}.join("\t")
end
end
puts
end
def set_to_s(s) "{#{s.join(',')}}" end def set_to_s(s) "{#{s.join(',')}}" end
def rel_to_s(r) "{#{r.map{|p| "(#{p[0]},#{p[1]})"}.join(',')}}" end def rel_to_s(r) "{#{r.map{|p| "(#{p[0]},#{p[1]})"}.join(',')}}" end