many changes, mainly dgp1 algorithm
This commit is contained in:
parent
d484a3254b
commit
acbabee742
7
AUTHORS
Normal file
7
AUTHORS
Normal file
@ -0,0 +1,7 @@
|
||||
Tomasz Obrębski <obrebski@amu.edu.pl>
|
||||
Michał Stolarski
|
||||
Justyna Walkowska <ynka@amu.edu.pl>
|
||||
Pawel Konieczka
|
||||
Marcin Walas
|
||||
Paweł Wereński
|
||||
Mateusz Hromada <ruanda@amu.edu.pl>
|
24
share/pl_PL.ISO-8859-2/Makefile
Normal file
24
share/pl_PL.ISO-8859-2/Makefile
Normal file
@ -0,0 +1,24 @@
|
||||
include ../../config.mak
|
||||
|
||||
TARGETS = lem.bin lem.cats cor.bin gue.bin
|
||||
COMPDICDIR = ../
|
||||
.PHONY: all
|
||||
all: $(TARGETS)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# main section
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
lem.bin: lem.fst
|
||||
../../src/compdic/compdic-fst-to-bin lem.fst lem.bin
|
||||
|
||||
lem.fst: lem.dic
|
||||
../../src/compdic/compdic-dic-to-fst lem.dic lem.fst
|
||||
|
||||
lem.cats: lem.dic
|
||||
../../src/compdic/compdic-dic-to-cats lem.dic lem.cats
|
||||
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f lem.bin lem.fst lem.cats
|
2521575
share/pl_PL.ISO-8859-2/lem.dic
Normal file
2521575
share/pl_PL.ISO-8859-2/lem.dic
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,7 @@
|
||||
#! /bin/bash
|
||||
|
||||
no_of_parts=0
|
||||
part_size=100000
|
||||
|
||||
while [ $# -gt 2 ]
|
||||
do
|
||||
@ -36,7 +37,7 @@ fst=$2
|
||||
|
||||
if [ $no_of_parts -eq 0 ]
|
||||
then
|
||||
no_of_parts=$(( `cat $1 | wc -l` / 75000 + 1 ))
|
||||
no_of_parts=$(( `cat $1 | wc -l` / $part_size + 1 ))
|
||||
fi
|
||||
|
||||
|
||||
|
328
src/dgp/dgp1.cc
328
src/dgp/dgp1.cc
@ -1,7 +1,7 @@
|
||||
#include <iostream>
|
||||
using namespace std;
|
||||
|
||||
#include "dgp0.hh"
|
||||
#include "dgp1.hh"
|
||||
#include "global.hh"
|
||||
|
||||
extern Grammar grammar;
|
||||
@ -70,27 +70,15 @@ NodeProp compute_dep_prop(NodeProp depprop, const Link& link, list<Boubble*> bs)
|
||||
|
||||
//====================================================================================================
|
||||
|
||||
int find_existing_node(int mnodeind, NodeProp p, bitset<MAXNODES>& newheadLH, bitset<MAXNODES>& newheadLV)
|
||||
int find_existing_node(int mnode, NodeProp p, Edge e)
|
||||
{
|
||||
MNode& mnode = mgraph[mnodeind];
|
||||
int ret=-1;
|
||||
for(vector<int>::iterator ps=mnode.snodes.begin(); ps!=mnode.snodes.end(); ++ps)
|
||||
for(vector<int>::iterator i = mgraph[mnode].snodes.begin(); i!=mgraph[mnode].snodes.end(); ++i)
|
||||
if(sgraph[*i].prop==p && sgraph[*i].edge==e)
|
||||
{
|
||||
if(debug) fprintf(stderr,"#find existing node: checking %d ... \n", *ps);
|
||||
if(sgraph[*ps].prop==p)
|
||||
if(sgraph[*ps].LH==newheadLH && sgraph[*ps].LV==newheadLV)
|
||||
{
|
||||
ret = *ps;
|
||||
if(debug) fprintf(stderr,"#\tsucceeded because of LH/LV equality ()\n");
|
||||
if(debug) fprintf(stderr,"\t\treusing %d\n",*i);
|
||||
return *i;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(debug) fprintf(stderr,"#\tfailed beacause of LH/LV inequality\n");
|
||||
}
|
||||
}
|
||||
|
||||
if(debug) fprintf(stderr,"\n");
|
||||
return ret;
|
||||
return -1;
|
||||
}
|
||||
|
||||
//====================================================================================================
|
||||
@ -160,261 +148,82 @@ void create_reverse_links(int n)
|
||||
|
||||
//====================================================================================================
|
||||
|
||||
int create_new_head_node_left(int anc, NodeProp& prop, bitset<MAXNODES>& LH, bitset<MAXNODES>& LD, bitset<MAXNODES>& LV)
|
||||
int create_new_node(int anc, NodeProp& prop, Edge edge)
|
||||
{
|
||||
int newheadind = sgraph.clone(anc,prop);
|
||||
int newheadind = sgraph.clone(anc,prop,edge);
|
||||
nodelist.push_back(newheadind);
|
||||
sgraph[newheadind].LH = LH;
|
||||
sgraph[newheadind].LD = LD;
|
||||
sgraph[newheadind].in_LH = true;
|
||||
sgraph[newheadind].LV.reset();
|
||||
|
||||
copy_links(anc,newheadind);
|
||||
create_reverse_links(newheadind);
|
||||
|
||||
if(debug) sgraph.print_node_debug(stderr,"add new",newheadind,anc);
|
||||
if(debug) sgraph.print_node_debug(stderr,"clone",newheadind,anc);
|
||||
// if(debug) print_sets(newheadind);
|
||||
return newheadind;
|
||||
}
|
||||
|
||||
int create_new_dep_node_left(int anc, NodeProp& prop, bitset<MAXNODES>& LH, bitset<MAXNODES>& LD, bitset<MAXNODES>& LV)
|
||||
{
|
||||
int newind = sgraph.clone(anc,prop);
|
||||
nodelist.push_back(newind);
|
||||
sgraph[newind].LH.reset();
|
||||
sgraph[newind].LD=LD;
|
||||
sgraph[newind].in_LH=false; //???????
|
||||
sgraph[newind].LV.reset();
|
||||
|
||||
copy_links(anc,newind);
|
||||
create_reverse_links(newind);
|
||||
|
||||
if(debug) sgraph.print_node_debug(stderr,"add new",newind,anc);
|
||||
// if(debug) print_sets(newind);
|
||||
|
||||
return newind;
|
||||
}
|
||||
|
||||
int create_new_head_node_right(int anc, NodeProp& prop, bitset<MAXNODES>& newheadLH, bitset<MAXNODES>& newheadLD, bitset<MAXNODES>& newheadLV)
|
||||
{
|
||||
int newheadind = sgraph.clone(anc,prop);
|
||||
nodelist.push_back(newheadind);
|
||||
sgraph[newheadind].LH=newheadLH;
|
||||
sgraph[newheadind].LD=newheadLD;
|
||||
sgraph[newheadind].in_LH=false;
|
||||
sgraph[newheadind].LV=newheadLV;
|
||||
|
||||
copy_links(anc,newheadind);
|
||||
create_reverse_links(newheadind);
|
||||
|
||||
if(debug) sgraph.print_node_debug(stderr,"add new",newheadind,anc);
|
||||
// if(debug) print_sets(newheadind);
|
||||
|
||||
return newheadind;
|
||||
}
|
||||
|
||||
int create_new_dep_node_right(int anc, NodeProp& prop, bitset<MAXNODES>& LH, bitset<MAXNODES>& LD, bitset<MAXNODES>& LV)
|
||||
{
|
||||
int newind = sgraph.clone(anc,prop);
|
||||
nodelist.push_back(newind);
|
||||
sgraph[newind].LH=LH;
|
||||
sgraph[newind].LD=LD;
|
||||
sgraph[newind].in_LH=true; //???????
|
||||
sgraph[newind].LV.reset();
|
||||
|
||||
copy_links(anc,newind);
|
||||
create_reverse_links(newind);
|
||||
|
||||
if(debug) sgraph.print_node_debug(stderr,"ADD NEW",newind,anc);
|
||||
// if(debug) print_sets(newind);
|
||||
|
||||
return newind;
|
||||
}
|
||||
|
||||
//====================================================================================================
|
||||
|
||||
void connect_left(int h, int d, const Link& l, list<Boubble*>& new_head_boubbles, list<Boubble*>& new_dep_boubbles)
|
||||
{
|
||||
|
||||
NodeProp &oldheadprop = sgraph[h].prop;
|
||||
NodeProp &olddepprop = sgraph[d].prop;
|
||||
NodeProp &old_head_prop = sgraph[h].prop;
|
||||
NodeProp &old_dep_prop = sgraph[d].prop;
|
||||
NodeProp new_head_prop = compute_head_prop(old_head_prop,l,new_head_boubbles,old_dep_prop.flags);
|
||||
NodeProp new_dep_prop = compute_dep_prop(old_dep_prop,l,new_dep_boubbles);
|
||||
|
||||
NodeProp newheadprop = compute_head_prop(oldheadprop,l,new_head_boubbles,olddepprop.flags);
|
||||
Edge new_dep_edge(sgraph[d].edge);
|
||||
int newd = find_existing_node(sgraph[d].mnode, new_dep_prop, new_dep_edge);
|
||||
if( newd < 0 )
|
||||
newd = create_new_node(d,new_dep_prop,new_dep_edge);
|
||||
|
||||
int newheadind;
|
||||
if(oldheadprop==newheadprop)
|
||||
newheadind = h;
|
||||
else
|
||||
Edge new_head_edge(sgraph[newd].edge,newd);
|
||||
int newh = find_existing_node(sgraph[h].mnode, new_head_prop, new_head_edge);
|
||||
if( newh < 0 )
|
||||
newh = create_new_node(h,new_head_prop,new_head_edge);
|
||||
|
||||
sgraph[newh].deps.push_back(Arc(newd,l.role,h,d));
|
||||
sgraph[newd].heads.push_back(Arc(newh,l.role,h,d));
|
||||
|
||||
if(debug)
|
||||
{
|
||||
bitset<MAXNODES> newheadLH = sgraph[h].LH;
|
||||
bitset<MAXNODES> newheadLV = sgraph[d].LV;
|
||||
bitset<MAXNODES> newheadLD = sgraph[h].LD;
|
||||
|
||||
newheadind = find_existing_node(sgraph[h].mnode, newheadprop, newheadLH, newheadLV);
|
||||
if( newheadind >= 0) // W£¡CZONE
|
||||
sgraph[newheadind].LD |= newheadLD;
|
||||
else
|
||||
{
|
||||
newheadind = create_new_head_node_left(h,newheadprop,newheadLH,newheadLD,newheadLV);
|
||||
sgraph[newheadind].edge.clear();
|
||||
sgraph[newheadind].edge_contains_self = false;
|
||||
sgraph.print_arc(stderr,"link",newh,d,l.role,0);
|
||||
sgraph.print_node_debug(stderr,"",newh,h);
|
||||
sgraph.print_node_debug(stderr,"",newd,d);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
NodeProp newdepprop = compute_dep_prop(olddepprop,l,new_dep_boubbles);
|
||||
|
||||
int newdepind;
|
||||
|
||||
if(olddepprop==newdepprop)
|
||||
newdepind = d;
|
||||
else
|
||||
{
|
||||
bitset<MAXNODES> newdepLH = sgraph[d].LH;
|
||||
bitset<MAXNODES> newdepLV = sgraph[d].LV;
|
||||
bitset<MAXNODES> newdepLD = sgraph[d].LD;
|
||||
|
||||
newdepind = find_existing_node(sgraph[d].mnode, newdepprop, newdepLH, newdepLV);
|
||||
if( newdepind >= 0) // W£¡CZONE
|
||||
sgraph[newdepind].LD |= newdepLD; // TYLKO DLA LD
|
||||
else
|
||||
{
|
||||
newdepind = create_new_dep_node_left(d,newdepprop,newdepLH,newdepLD,newdepLV);
|
||||
sgraph[newdepind].edge.clear();
|
||||
//sgraph[newdepind].edge.push_back(newdepind); // TO
|
||||
sgraph[newdepind].edge_contains_self = true; // LUB TO
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
sgraph[newheadind].deps.push_back(Arc(newdepind,l.role,h,d));
|
||||
sgraph[newdepind].heads.push_back(Arc(newheadind,l.role,h,d));
|
||||
sgraph[newheadind].edge.push_back(newdepind);
|
||||
|
||||
if(sgraph[d].saturated()) sgraph[newheadind].LV |= sgraph[d].LV;
|
||||
|
||||
sgraph[newheadind].LD.set(d);
|
||||
if(sgraph[d].saturated()) sgraph[newheadind].LD |= sgraph[d].LD;
|
||||
|
||||
if(debug) sgraph.print_arc(stderr,"new link",newheadind,d,l.role,0);
|
||||
if(debug) sgraph.print_node_debug(stderr,"update",newheadind,h);
|
||||
// if(debug) print_sets(newheadind);
|
||||
if(debug) sgraph.print_node_debug(stderr,"update",newdepind,d);
|
||||
// if(debug) print_sets(newdepind);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
|
||||
void connect_right(int h, int d, const Link& l, list<Boubble*>& new_head_boubbles, list<Boubble*>& new_dep_boubbles)
|
||||
{
|
||||
NodeProp &oldheadprop = sgraph[h].prop;
|
||||
NodeProp &old_head_prop = sgraph[h].prop;
|
||||
NodeProp &old_dep_prop = sgraph[d].prop;
|
||||
NodeProp new_head_prop = compute_head_prop(old_head_prop,l,new_head_boubbles,old_dep_prop.flags);
|
||||
NodeProp new_dep_prop = compute_dep_prop(old_dep_prop,l,new_dep_boubbles);
|
||||
|
||||
NodeProp newheadprop = compute_head_prop(oldheadprop,l,new_head_boubbles, sgraph[d].prop.flags);
|
||||
|
||||
int newheadind;
|
||||
|
||||
if(oldheadprop==newheadprop)
|
||||
newheadind = h;
|
||||
else
|
||||
Edge new_head_edge(sgraph[h].edge);
|
||||
int newh = find_existing_node(sgraph[h].mnode, new_head_prop, new_head_edge);
|
||||
if( newh < 0 )
|
||||
{
|
||||
bitset<MAXNODES> newheadLH = sgraph[h].LH;
|
||||
bitset<MAXNODES> newheadLV = sgraph[h].LV;
|
||||
bitset<MAXNODES> newheadLD = sgraph[h].LD;
|
||||
newh = create_new_node(h,new_head_prop,new_head_edge);
|
||||
sgraph[newh].visible_as_neighbour = false;
|
||||
}
|
||||
|
||||
newheadind = find_existing_node(sgraph[h].mnode, newheadprop, newheadLH, newheadLV);
|
||||
Edge new_dep_edge;
|
||||
int newd = find_existing_node(sgraph[d].mnode, new_dep_prop, new_dep_edge);
|
||||
if( newd < 0)
|
||||
newd = create_new_node(d,new_dep_prop,new_dep_edge);
|
||||
|
||||
if(debug) fprintf(stderr,"#HEAD EXISTS %d\n",newheadind);
|
||||
|
||||
if( newheadind >= 0) // W£¡CZONE
|
||||
sgraph[newheadind].LD |= newheadLD; // TYLKO DLA LD
|
||||
else
|
||||
sgraph[newd].heads.push_back(Arc(newh,l.role,h,d));
|
||||
sgraph[newh].deps.push_back(Arc(newd,l.role,h,d));
|
||||
|
||||
if(debug)
|
||||
{
|
||||
newheadind = create_new_head_node_right(h,newheadprop,newheadLH,newheadLD,newheadLV);
|
||||
//if(!sgraph[h].edge.empty()) sgraph[newheadind].edge.push_back(newheadind); // TO
|
||||
sgraph[newheadind].edge_contains_self = sgraph[h].edge_contains_self; // LUB TO
|
||||
sgraph[newheadind].visible_as_neighbour = false;
|
||||
}
|
||||
sgraph.print_arc(stderr,"link",newh,newd,l.role,1);
|
||||
sgraph.print_node_debug(stderr,"",newh,h);
|
||||
sgraph.print_node_debug(stderr,"",newd,d);
|
||||
}
|
||||
|
||||
NodeProp &olddepprop = sgraph[d].prop;
|
||||
NodeProp newdepprop = compute_dep_prop(olddepprop,l,new_dep_boubbles);
|
||||
|
||||
int newdepind;
|
||||
|
||||
if(olddepprop==newdepprop)
|
||||
newdepind = d;
|
||||
else
|
||||
{
|
||||
bitset<MAXNODES> newdepLH = sgraph[d].LH;
|
||||
bitset<MAXNODES> newdepLV = sgraph[d].LV;
|
||||
bitset<MAXNODES> newdepLD = sgraph[d].LD;
|
||||
|
||||
newdepind = find_existing_node(sgraph[d].mnode, newdepprop, newdepLH, newdepLV);
|
||||
|
||||
if(debug) fprintf(stderr,"#DEP EXISTS %d\n",newdepind);
|
||||
|
||||
if( newdepind >= 0) // W£¡CZONE
|
||||
sgraph[newdepind].LD |= newdepLD; // TYLKO DLA LD
|
||||
else
|
||||
{
|
||||
newdepind = create_new_dep_node_right(d,newdepprop,newdepLH,newdepLD,newdepLV);
|
||||
sgraph[newdepind].edge.clear();
|
||||
sgraph[newdepind].edge_contains_self = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
sgraph[newdepind].heads.push_back(Arc(newheadind,l.role,h,d));
|
||||
sgraph[newheadind].deps.push_back(Arc(newdepind,l.role,h,d));
|
||||
//sgraph[newdepind].edge.push_back(newheadind);
|
||||
|
||||
sgraph[newdepind].LH.set(newheadind);
|
||||
|
||||
// sgraph[*d].prop.merge_boubbles(new_dep_boubbles);
|
||||
|
||||
if(sgraph[newheadind].saturated()) sgraph[newdepind].LH |= sgraph[newheadind].LH;
|
||||
|
||||
if(debug) sgraph.print_arc(stderr,"new link",newheadind,newdepind,l.role,1);
|
||||
if(debug) sgraph.print_node_debug(stderr,"update",newheadind,h);
|
||||
if(debug) sgraph.print_node_debug(stderr,"update",newdepind,d);
|
||||
|
||||
}
|
||||
|
||||
//====================================================================================================
|
||||
|
||||
// bool check_meeting_boubles(list<Boubble*>& hboubbles, list<Boubble*>& dboubbles)
|
||||
// {
|
||||
// bool hremove=false; // czy usun±æ ostatnio sprawdzany b±bel
|
||||
// bool dremove=false; // czy usun±æ ostatnio sprawdzany b±bel
|
||||
|
||||
// for(list<Boubble*>::iterator hb = hboubbles.begin(); hb != hboubbles.end(); hb = hremove ? hboubbles.erase(hb) : ++hb )
|
||||
// {
|
||||
// hremove=false;
|
||||
// for(list<Boubble*>::iterator db = dboubbles.begin(); db != dboubbles.end(); db = dremove ? dboubbles.erase(db) : ++db )
|
||||
// {
|
||||
// dremove=false;
|
||||
// if( (*hb)->rel()==(*db)->rel() && (*hb)->dir()==DOWN && (*db)->dir()==UP && (*hb)->reverse()!=(*db)->reverse() )
|
||||
// {
|
||||
// int srcnode,dstnode;
|
||||
// if( (*hb)->reverse()==false )
|
||||
// srcnode = (*hb)->src(), dstnode = (*db)->src();
|
||||
// else
|
||||
// srcnode = (*db)->src(), dstnode = (*hb)->src();
|
||||
// if( grammar.check_longrel(sgraph.cat(srcnode), sgraph.cat(dstnode), (*hb)->rel()) )
|
||||
// {
|
||||
// hremove=dremove=true;
|
||||
// if(debug) fprintf(stderr,"BOUBBLES MET!!!\n");
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// if(debug) fprintf(stderr,"BOUBBLES' MEETING FAILED!!!\n");
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return true;
|
||||
// }
|
||||
|
||||
//====================================================================================================
|
||||
|
||||
@ -423,17 +232,18 @@ bool check_meeting_boubles(list<Boubble*>& boubbles)
|
||||
bool hremove=false; // czy usun±æ ostatnio sprawdzany b±bel
|
||||
bool dremove=false; // czy usun±æ ostatnio sprawdzany b±bel
|
||||
|
||||
// cerr << "CHECKING MEETING BUBBLES" << endl;
|
||||
|
||||
for(list<Boubble*>::iterator hb = boubbles.begin(); hb != boubbles.end(); hb = hremove ? boubbles.erase(hb) : ++hb )
|
||||
{
|
||||
cout << endl << "hb:" << **hb ;
|
||||
hremove=false;
|
||||
for(list<Boubble*>::iterator db = hb; db != boubbles.end(); db = dremove ? boubbles.erase(db) : ++db )
|
||||
{
|
||||
cout << " db:" << **db;
|
||||
// cerr << " db:" << **db;
|
||||
dremove=false;
|
||||
if( (*hb)->rel()==(*db)->rel() && (*hb)->reverse()!=(*db)->reverse() )
|
||||
{
|
||||
cout << "Z";
|
||||
// cerr << "Z";
|
||||
int srcnode,dstnode;
|
||||
if( (*hb)->reverse()==false )
|
||||
srcnode = (*hb)->src(), dstnode = (*db)->src();
|
||||
@ -441,13 +251,13 @@ bool check_meeting_boubles(list<Boubble*>& boubbles)
|
||||
srcnode = (*db)->src(), dstnode = (*hb)->src();
|
||||
if( grammar.check_longrel(sgraph.cat(srcnode), sgraph.cat(dstnode), (*hb)->rel()) )
|
||||
{
|
||||
cout << " REMOVE ";
|
||||
// cerr << " REMOVE ";
|
||||
hremove=dremove=true;
|
||||
if(debug) fprintf(stderr,"BOUBBLES MET!!!\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << " FAIL ";
|
||||
// cerr << " FAIL ";
|
||||
if(debug) fprintf(stderr,"BOUBBLES' MEETING FAILED!!!\n");
|
||||
return false;
|
||||
}
|
||||
@ -470,7 +280,7 @@ bool check_boubbles_at_target(list<Boubble*>& boubbles, int node)
|
||||
if( (*b)->is_at_target() )
|
||||
if( grammar.check_longrel(sgraph.cat((*b)->src()), sgraph.cat(node), (*b)->rel()) )
|
||||
{
|
||||
cout << endl << "REMOVE ChBatT " << **b << endl;
|
||||
// cerr << endl << "REMOVE ChBatT " << **b << endl;
|
||||
remove=true;
|
||||
}
|
||||
else
|
||||
@ -488,12 +298,9 @@ void try_connect_dependents(int j)
|
||||
LViterator lvi(sgraph,j);
|
||||
int i;
|
||||
while((i=lvi.next()) >= 0)
|
||||
{
|
||||
//if(debug) sgraph.print_node_debug(stderr,"D-CUR>",i,-1);
|
||||
|
||||
if(sgraph.saturated(i))
|
||||
{
|
||||
if(debug) {fprintf(stderr,"%d <--",i); }
|
||||
if(debug) {fprintf(stderr,"\t%d <-- %d",i,j); }
|
||||
|
||||
list<const Link*> ji_links = grammar.connectable2( sgraph.cat(j), sgraph.cat(i), sgraph[j].prop.flags, sgraph[i].prop.flags); // ref do Roles!!!
|
||||
list<const Link*>::iterator ri = ji_links.begin();
|
||||
@ -516,7 +323,7 @@ void try_connect_dependents(int j)
|
||||
{
|
||||
if(debug) fprintf(stderr," ...SUCCESS!\n");
|
||||
connect_left( j, i, **ri, new_head_boubbles, new_dep_boubbles);
|
||||
lvi.update_edge(sgraph,i);
|
||||
// lvi.update_edge(sgraph,i);
|
||||
}
|
||||
else
|
||||
{ if(debug) fprintf(stderr," ...boubbles failed\n"); }
|
||||
@ -525,9 +332,7 @@ void try_connect_dependents(int j)
|
||||
}
|
||||
}
|
||||
else
|
||||
if(debug) {fprintf(stderr,"%d <-- unsaturated\n",i); }
|
||||
}
|
||||
|
||||
if(debug) {fprintf(stderr,"\t%d <-- %d\t%d unsaturated\n",i,j,i); }
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
|
||||
@ -536,11 +341,9 @@ void try_connect_heads(int j)
|
||||
LViterator lvi(sgraph,j);
|
||||
int i;
|
||||
while((i=lvi.next()) >= 0)
|
||||
{
|
||||
// if(debug) sgraph.print_node_debug(stderr,"H-CUR> ",i,-1);
|
||||
if(sgraph.saturated(j))
|
||||
{
|
||||
if(debug) fprintf(stderr, "%d -->",i);
|
||||
if(debug) fprintf(stderr, "\t%d --> %d",i,j);
|
||||
|
||||
list<const Link*> ij_links = grammar.connectable2( sgraph.cat(i), sgraph.cat(j), sgraph[i].prop.flags, sgraph[j].prop.flags );
|
||||
list<const Link*>::iterator ri = ij_links.begin();
|
||||
@ -572,8 +375,7 @@ void try_connect_heads(int j)
|
||||
}
|
||||
}
|
||||
else
|
||||
if(debug) {fprintf(stderr,"%d <-- unsaturated\n",j); }
|
||||
}
|
||||
if(debug) {fprintf(stderr,"\t* <-- %d unsaturated\n",j); }
|
||||
}
|
||||
|
||||
//====================================================================================================
|
||||
@ -661,13 +463,13 @@ void dgp1()
|
||||
set_initial_constraints(basenode);
|
||||
nodelist.push_back(basenode);
|
||||
|
||||
if(debug) sgraph.print_node_debug(stderr,"add base",basenode,-1); // STDOUT!!!
|
||||
if(debug) sgraph.print_node_debug(stderr,"node",basenode,-1); // STDOUT!!!
|
||||
// if(debug) print_sets(basenode);
|
||||
|
||||
list<int>::iterator cursor=processed;
|
||||
while(++cursor != nodelist.end())
|
||||
{
|
||||
if(debug) sgraph.print_node_debug(stderr,"MAIN-CUR> ",*cursor,-1);
|
||||
if(debug) sgraph.print_node_debug(stderr,"CUR>",*cursor,-1);
|
||||
try_connect_dependents(*cursor);
|
||||
try_connect_heads(*cursor);
|
||||
processed=cursor;
|
||||
|
@ -1,5 +1,5 @@
|
||||
#ifndef _DGP0_HH
|
||||
#define _DGP0_HH
|
||||
#ifndef _DGP1_HH
|
||||
#define _DGP1_HH
|
||||
|
||||
#include "grammar.hh"
|
||||
#include "sgraph.hh"
|
||||
|
@ -1,4 +1,3 @@
|
||||
#include "global.hh"
|
||||
#include "sgraph.hh"
|
||||
#include "grammar.hh"
|
||||
#include "const.hh"
|
||||
@ -15,21 +14,21 @@ int SGraph::add_base_snode(int mnodeind)
|
||||
|
||||
newnode.mnode=mnodeind;
|
||||
|
||||
for(vector<int>::iterator pm=mgraph[newnode.mnode].pred.begin(); pm!=mgraph[newnode.mnode].pred.end(); ++pm)
|
||||
for(vector<int>::iterator ps=mgraph[*pm].snodes.begin(); ps!=mgraph[*pm].snodes.end(); ++ps)
|
||||
if(nodes[*ps].in_LH)
|
||||
{
|
||||
newnode.LV.set(*ps);
|
||||
if(nodes[*ps].saturated()) newnode.LV |= nodes[*ps].LH;
|
||||
}
|
||||
// for(vector<int>::iterator pm=mgraph[newnode.mnode].pred.begin(); pm!=mgraph[newnode.mnode].pred.end(); ++pm)
|
||||
// for(vector<int>::iterator ps=mgraph[*pm].snodes.begin(); ps!=mgraph[*pm].snodes.end(); ++ps)
|
||||
// if(nodes[*ps].in_LH)
|
||||
// {
|
||||
// newnode.LV.set(*ps);
|
||||
// if(nodes[*ps].saturated()) newnode.LV |= nodes[*ps].LH;
|
||||
// }
|
||||
|
||||
mgraph[newnode.mnode].snodes.push_back(lastnodeind());
|
||||
|
||||
newnode.in_LH=true;
|
||||
// newnode.in_LH=true;
|
||||
|
||||
newnode.edge.push_back(lastnodeind());
|
||||
// newnode.edge.push_back(lastnodeind());
|
||||
|
||||
newnode.edge_contains_self = true ;
|
||||
newnode.edge.insert_self();
|
||||
|
||||
return lastnodeind();
|
||||
}
|
||||
@ -54,12 +53,13 @@ void SGraph::update_right(int headind, int depind)
|
||||
|
||||
//====================================================================================================
|
||||
|
||||
int SGraph::clone(int ancind, NodeProp newprop)
|
||||
int SGraph::clone(int ancind, NodeProp newprop, Edge edge)
|
||||
{
|
||||
SNode &newnode=makenewnode();
|
||||
SNode &ancnode = nodes[ancind];
|
||||
|
||||
newnode.prop = newprop;
|
||||
newnode.edge = edge;
|
||||
newnode.mnode = ancnode.mnode;
|
||||
mgraph[newnode.mnode].snodes.push_back(lastnodeind());
|
||||
|
||||
@ -89,9 +89,9 @@ int SGraph::print_node_debug(FILE* f, const char* pref, int n, int anc)
|
||||
void SGraph::print_arc(FILE* f, const char* msg, int head, int dep, Role role, int dir) // 0 - left, 1 - right
|
||||
{
|
||||
if(dir==0)
|
||||
fprintf(f,"%s %s:%d <-- %d\n", msg, role.str(), dep, head);
|
||||
fprintf(f,"%s\t%d <-- %d\t%s\n", msg, dep, head, role.str());
|
||||
else
|
||||
fprintf(f,"%s %s:%d --> %d\n", msg, role.str(), head, dep);
|
||||
fprintf(f,"%s\t%d --> %d\t%s\n", msg, head, dep, role.str());
|
||||
}
|
||||
|
||||
//====================================================================================================
|
||||
@ -171,15 +171,17 @@ int SGraph::sprint_node(char* buf, int nodeind, int anc, unsigned int info)
|
||||
int SGraph::sprint_node_debug(char* buf, const char* pref, int n, int anc)
|
||||
{
|
||||
char *buf0 = buf;
|
||||
buf+=sprintf(buf,"%-10s",pref);
|
||||
buf+=sprintf(buf,"%-8s",pref);
|
||||
buf+=sprintf(buf,"%d.%s",n,form(n));
|
||||
buf+=sprintf(buf,";");
|
||||
buf+=sprintf(buf,"%s ",cat(n).str());
|
||||
while(buf-buf0<40) buf+=sprintf(buf," ");
|
||||
buf+=sprint_node(buf,n,anc,HEADS|DEPS|SETS|CONSTRAINTS);
|
||||
buf+=sprint_node(buf,n,anc,HEADS|DEPS|CONSTRAINTS);
|
||||
|
||||
buf+=sprintf(buf,"/");
|
||||
for(vector<int>::iterator e = nodes[n].edge.begin(); e != nodes[n].edge.end(); e++ )
|
||||
if(nodes[n].edge.self())
|
||||
buf += sprintf(buf,"* ");
|
||||
for(list<int>::iterator e = nodes[n].edge.others().begin(); e != nodes[n].edge.others().end(); e++ )
|
||||
buf += sprintf(buf,"%d ", *e);
|
||||
|
||||
buf+=sprintf(buf,"\n");
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include "mgraph.hh"
|
||||
#include "thesymbols.hh"
|
||||
#include "boubble.hh"
|
||||
|
||||
#include "global.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -115,43 +115,11 @@ void NodeProp::copy(const NodeProp& p)
|
||||
boubbles.push_back(new Boubble(**b));
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
|
||||
inline
|
||||
NodeProp::~NodeProp()
|
||||
{
|
||||
clear_boubbles();
|
||||
}
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
|
||||
inline
|
||||
NodeProp::NodeProp()
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
|
||||
inline
|
||||
NodeProp::NodeProp(const NodeProp& p)
|
||||
{
|
||||
copy(p);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
|
||||
inline
|
||||
NodeProp& NodeProp::operator=(const NodeProp& p)
|
||||
{
|
||||
clear();
|
||||
copy(p);
|
||||
return *this;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
|
||||
inline
|
||||
void NodeProp::clear()
|
||||
inline NodeProp::~NodeProp() { clear_boubbles(); }
|
||||
inline NodeProp::NodeProp() { clear(); }
|
||||
inline NodeProp::NodeProp(const NodeProp& p) { copy(p); }
|
||||
inline NodeProp& NodeProp::operator=(const NodeProp& p) { clear(); copy(p); return *this; }
|
||||
inline void NodeProp::clear()
|
||||
{
|
||||
required.reset();
|
||||
forbidden.reset();
|
||||
@ -161,6 +129,31 @@ void NodeProp::clear()
|
||||
clear_boubbles();
|
||||
}
|
||||
|
||||
//====================================================================================================
|
||||
// CLASS Edge
|
||||
//====================================================================================================
|
||||
|
||||
class Edge
|
||||
{
|
||||
public:
|
||||
Edge() : _self(false) { }
|
||||
Edge(const Edge& e, int map_self) { assign(e,map_self); }
|
||||
|
||||
bool self() const { return _self; }
|
||||
list<int>& others() { return _others; }
|
||||
|
||||
void insert_self(bool b=true) { _self=b; }
|
||||
void insert(int n) { list<int>::iterator i=others().begin(); while(i!=others().end() && *i<n) ++i; others().insert(i,n);}
|
||||
void insert(list<int> l) { for(list<int>::const_iterator i=l.begin(); i!=l.end(); i++) insert(*i); }
|
||||
|
||||
void assign(const Edge& e, int map_self=-1) { _others = e._others; if(e.self()) { _self = false; insert(map_self); } }
|
||||
const bool operator==(const Edge& e) const { return _self == e._self && _others == e._others; }
|
||||
|
||||
private:
|
||||
bool _self;
|
||||
list<int> _others;
|
||||
};
|
||||
|
||||
//====================================================================================================
|
||||
// CLASS SNode
|
||||
//====================================================================================================
|
||||
@ -174,8 +167,7 @@ struct SNode
|
||||
|
||||
NodeProp prop;
|
||||
|
||||
vector<int> edge;
|
||||
bool edge_contains_self;
|
||||
Edge edge;
|
||||
bool visible_as_neighbour;
|
||||
|
||||
bitset<MAXNODES> LV;
|
||||
@ -188,6 +180,14 @@ struct SNode
|
||||
|
||||
void clear();
|
||||
bool saturated();
|
||||
|
||||
// void edge_clear() { edge.clear(); edge_contains_self=false;}
|
||||
// void edge_set(int i) { edge.clear(); edge_contains_self=false; edge.push_back(i); }
|
||||
// void edge_set(vector<int>& v) { edge.assign(v.begin(),v.end()); edge_contains_self=false; }
|
||||
// void edge_set_self(bool b=true) { edge.clear(); edge_contains_self=b; }
|
||||
// void edge_add(int i) { edge.push_back(i); }
|
||||
// void edge_add(vector<int>& v) { edge.insert(edge.end(),v.begin(),v.end()); }
|
||||
// void edge_add_self(bool b=true) { edge_contains_self=b; }
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
@ -215,7 +215,7 @@ public:
|
||||
|
||||
void clear() { nodes.clear(); }
|
||||
int add_base_snode(int mnodeind);
|
||||
int clone(int ancind, NodeProp newprop);
|
||||
int clone(int ancind, NodeProp newprop, Edge edge);
|
||||
void update_left(int headind, int depind);
|
||||
void update_right(int headind, int depind);
|
||||
bool visible(int left, int right);
|
||||
@ -298,7 +298,7 @@ public:
|
||||
void update_edge(SGraph& sg, int e);
|
||||
|
||||
private:
|
||||
|
||||
int snode;
|
||||
SGraph& sgraph;
|
||||
MGraph& mgraph;
|
||||
stack<int> waydown;
|
||||
@ -311,28 +311,31 @@ private:
|
||||
|
||||
};
|
||||
|
||||
inline LViterator::LViterator(SGraph& sg, int n, bool s=true) : sgraph(sg), mgraph(sg.mgraph), strict(s)
|
||||
inline LViterator::LViterator(SGraph& sg, int n, bool s=true) : snode(n), sgraph(sg), mgraph(sg.mgraph), strict(s)
|
||||
{
|
||||
if(sg[n].edge_contains_self) // TO DODAÆ PO PRZEJ¦CIU NA EDGE_CONTAINS_SELF
|
||||
if(sg[n].edge.self())
|
||||
{
|
||||
push_ld(n);
|
||||
push_ln(n);
|
||||
}
|
||||
|
||||
for(vector<int>::iterator i=sg[n].edge.begin(); i!=sg[n].edge.end(); ++i)
|
||||
{
|
||||
if(*i != n)
|
||||
for(list<int>::iterator i=sg[n].edge.others().begin(); i!=sg[n].edge.others().end(); ++i)
|
||||
{
|
||||
push_ld(*i);
|
||||
push_ln(*i);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
inline void LViterator::update_edge(SGraph& sg, int n)
|
||||
{
|
||||
for(vector<int>::iterator i=sg[n].edge.begin(); i!=sg[n].edge.end(); ++i)
|
||||
if(sg[n].edge.self())
|
||||
{
|
||||
push_ld(n);
|
||||
push_ln(n);
|
||||
}
|
||||
|
||||
for(list<int>::iterator i=sg[n].edge.others().begin(); i!=sg[n].edge.others().end(); ++i)
|
||||
{
|
||||
push_ld(*i);
|
||||
push_ln(*i);
|
||||
@ -344,7 +347,10 @@ inline int LViterator::next()
|
||||
if(wayup.empty())
|
||||
{
|
||||
if(waydown.empty())
|
||||
{
|
||||
if(debug) fprintf(stderr,"\t\tLViterator(%d)\treturn %d\n",snode,-1);
|
||||
return -1; //
|
||||
}
|
||||
else
|
||||
{
|
||||
int k = waydown.top();
|
||||
@ -352,12 +358,16 @@ inline int LViterator::next()
|
||||
push_ld(k);
|
||||
push_ln(k);
|
||||
if(wayup.empty())
|
||||
{
|
||||
if(debug) fprintf(stderr,"\t\tLViterator(%d)\treturn %d\n",snode,-1);
|
||||
return -1; // k NIE MA POPRZEDNIKÓW, NIE MO¯E TE¯ ZATEM MIEÆ LEWOSTRONNYCH PODRZÊDNIKÓW
|
||||
}
|
||||
else
|
||||
{
|
||||
int i = wayup.top();
|
||||
wayup.pop();
|
||||
push_lh(i);
|
||||
if(debug) fprintf(stderr,"\t\tLViterator(%d)\treturn %d\n",snode,i);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
@ -368,6 +378,7 @@ inline int LViterator::next()
|
||||
int i = wayup.top();
|
||||
wayup.pop();
|
||||
push_lh(i);
|
||||
if(debug) fprintf(stderr,"\t\tLViterator(%d)\treturn %d\n",snode,i);
|
||||
return i;
|
||||
};
|
||||
}
|
||||
@ -377,7 +388,10 @@ inline void LViterator::push_ld(int i)
|
||||
vector<Arc>& arcs = sgraph[i].deps;
|
||||
for(vector<Arc>::iterator a = arcs.begin(); a != arcs.end(); ++a)
|
||||
if(mgraph[sgraph[a->dst].mnode].pos < mgraph[sgraph[i].mnode].pos)
|
||||
{
|
||||
push(waydown,a->dst);
|
||||
if(debug) fprintf(stderr,"\t\tLViterator(%d)\tPUSH_LD waydown %d\n",snode,a->dst);
|
||||
}
|
||||
}
|
||||
|
||||
inline void LViterator::push_lh(int i)
|
||||
@ -385,7 +399,10 @@ inline void LViterator::push_lh(int i)
|
||||
vector<Arc>& arcs = sgraph[i].heads;
|
||||
for(vector<Arc>::iterator a = arcs.begin(); a != arcs.end(); ++a)
|
||||
if(mgraph[sgraph[a->dst].mnode].pos < mgraph[sgraph[i].mnode].pos)
|
||||
{
|
||||
push(wayup,a->dst);
|
||||
if(debug) fprintf(stderr,"\t\tLViterator(%d)\tPUSH_LH wayup %d\n",snode,a->dst);
|
||||
}
|
||||
}
|
||||
|
||||
inline void LViterator::push_ln(int i)
|
||||
@ -396,7 +413,10 @@ inline void LViterator::push_ln(int i)
|
||||
vector<int>& spredecessors = mgraph[*mp].snodes;
|
||||
for(vector<int>::iterator sp = spredecessors.begin(); sp != spredecessors.end(); ++sp )
|
||||
if(sgraph[*sp].visible_as_neighbour || !strict)
|
||||
{
|
||||
push(wayup,*sp);
|
||||
if(debug) fprintf(stderr,"\t\tLViterator(%d)\tPUSH_LN wayup %d\n",snode, *sp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -412,16 +432,15 @@ public:
|
||||
int next();
|
||||
|
||||
private:
|
||||
|
||||
int snode;
|
||||
SGraph& sgraph;
|
||||
MGraph& mgraph;
|
||||
int thenode;
|
||||
stack<int> wayup;
|
||||
|
||||
void push_ln(int i);
|
||||
};
|
||||
|
||||
inline LNiterator::LNiterator(SGraph& sg, int n) : sgraph(sg), mgraph(sg.mgraph), thenode(n)
|
||||
inline LNiterator::LNiterator(SGraph& sg, int n) : sgraph(sg), mgraph(sg.mgraph), snode(n)
|
||||
{
|
||||
push_ln(n);
|
||||
}
|
||||
@ -429,11 +448,15 @@ inline LNiterator::LNiterator(SGraph& sg, int n) : sgraph(sg), mgraph(sg.mgraph)
|
||||
inline int LNiterator::next()
|
||||
{
|
||||
if(wayup.empty())
|
||||
{
|
||||
if(debug) fprintf(stderr,"\t\tLNiterator(%d)\treturn %d\n",snode,-1);
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i = wayup.top();
|
||||
wayup.pop();
|
||||
if(debug) fprintf(stderr,"\t\tLNiterator(%d)\treturn %d\n",snode,i);
|
||||
return i;
|
||||
};
|
||||
}
|
||||
@ -445,7 +468,10 @@ inline void LNiterator::push_ln(int i)
|
||||
{
|
||||
vector<int>& spredecessors = mgraph[*mp].snodes;
|
||||
for(vector<int>::iterator sp = spredecessors.begin(); sp != spredecessors.end(); ++sp )
|
||||
{
|
||||
wayup.push(*sp);
|
||||
if(debug) fprintf(stderr,"\t\tLNiterator(%d)\tPUSH %d\n",snode,-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -461,7 +487,7 @@ public:
|
||||
int next();
|
||||
|
||||
private:
|
||||
|
||||
int snode;
|
||||
SGraph& sgraph;
|
||||
MGraph& mgraph;
|
||||
stack<int> wayup;
|
||||
@ -469,7 +495,7 @@ private:
|
||||
void push_lh(int i);
|
||||
};
|
||||
|
||||
inline LHiterator::LHiterator(SGraph& sg, int n) : sgraph(sg), mgraph(sg.mgraph)
|
||||
inline LHiterator::LHiterator(SGraph& sg, int n) : snode(n), sgraph(sg), mgraph(sg.mgraph)
|
||||
{
|
||||
push_lh(n);
|
||||
}
|
||||
@ -492,7 +518,10 @@ inline void LHiterator::push_lh(int i)
|
||||
vector<Arc>& arcs = sgraph[i].heads;
|
||||
for(vector<Arc>::iterator a = arcs.begin(); a != arcs.end(); ++a)
|
||||
if(mgraph[sgraph[a->dst].mnode].pos < mgraph[sgraph[i].mnode].pos)
|
||||
{
|
||||
wayup.push(a->dst);
|
||||
if(debug) fprintf(stderr,"\t\tLHiterator(%d)\tPUSH %d\n",snode,-1);
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
@ -506,16 +535,15 @@ public:
|
||||
int next();
|
||||
|
||||
private:
|
||||
|
||||
int snode;
|
||||
SGraph& sgraph;
|
||||
MGraph& mgraph;
|
||||
int thenode;
|
||||
stack<int> waydown;
|
||||
|
||||
void push_ld(int i);
|
||||
};
|
||||
|
||||
inline LDiterator::LDiterator(SGraph& sg, int n) : sgraph(sg), mgraph(sg.mgraph), thenode(n)
|
||||
inline LDiterator::LDiterator(SGraph& sg, int n) : sgraph(sg), mgraph(sg.mgraph), snode(n)
|
||||
{
|
||||
push_ld(n);
|
||||
}
|
||||
@ -537,10 +565,11 @@ inline void LDiterator::push_ld(int i)
|
||||
{
|
||||
vector<Arc>& arcs = sgraph[i].deps;
|
||||
for(vector<Arc>::iterator a = arcs.begin(); a != arcs.end(); ++a)
|
||||
if(mgraph[sgraph[a->dst].mnode].pos < mgraph[sgraph[thenode].mnode].pos)
|
||||
if(mgraph[sgraph[a->dst].mnode].pos < mgraph[sgraph[snode].mnode].pos)
|
||||
{
|
||||
waydown.push(a->dst);
|
||||
if(debug) fprintf(stderr,"\t\tLDiterator(%d)\tPUSH %d\n",snode,-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -53,7 +53,7 @@ EOS {POINT}|{POINTS}|{QMARK}|{EXCL}
|
||||
|
||||
.* { ECHO; set_position(); }
|
||||
|
||||
<<EOF>> { printf("%04d 00 EOS *\n",pos+len); exit(1); }
|
||||
<<EOF>> { printf("%04d 00 EOS *\n",pos+len); exit(0); }
|
||||
|
||||
%%
|
||||
|
||||
|
245
src/tre/tre
245
src/tre/tre
@ -12,39 +12,54 @@ opts = GetoptLong.new(
|
||||
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
||||
[ '--format', '-F', GetoptLong::REQUIRED_ARGUMENT ],
|
||||
[ '--info', '-I', GetoptLong::REQUIRED_ARGUMENT ],
|
||||
[ '--dgpids', GetoptLong::NO_ARGUMENT ],
|
||||
[ '--graph', GetoptLong::NO_ARGUMENT ],
|
||||
[ '--uniq', '-u', GetoptLong::NO_ARGUMENT ],
|
||||
[ '--utt', GetoptLong::NO_ARGUMENT ],
|
||||
[ '--span', '-s', GetoptLong::REQUIRED_ARGUMENT ],
|
||||
[ '--maxsize', GetoptLong::REQUIRED_ARGUMENT ],
|
||||
[ '--forest', GetoptLong::NO_ARGUMENT ],
|
||||
[ '--ground', GetoptLong::NO_ARGUMENT ],
|
||||
[ '--only-trees','-t', GetoptLong::NO_ARGUMENT ])
|
||||
|
||||
$helptext=
|
||||
"The program generates trees from the graph output by dgp. dgp must\n"+
|
||||
"must be run with '--info=ds' option.\n\n"+
|
||||
"Command: tre [options]\n\n"+
|
||||
"Options:\n"+
|
||||
"--help -h Print help (this text) and exit.\n"+
|
||||
"--debug -d Verbose output. For developers only.\n"+
|
||||
"--format=s -F s Output format. Recognized values:\n"+
|
||||
" a root + list of arcs\n"+
|
||||
" p parenthesized notation\n"+
|
||||
" h human readable indented tree format\n"+
|
||||
" Multiple values are allowed. (default p)\n"+
|
||||
"--info=s -I s Information printed. Recognized values:\n"+
|
||||
" n node identifier\n"+
|
||||
" f surface form\n"+
|
||||
" m morphological information\n"+
|
||||
" l arc labels\n"+
|
||||
"--only-trees -t Do not copy input. Print trees only.\n"
|
||||
$helptext = <<END
|
||||
The program generates trees from the graph output by dgp. dgp must be run
|
||||
with '--info=ds' option.
|
||||
|
||||
Command: tre [options]
|
||||
|
||||
Options:
|
||||
--help -h Print help (this text) and exit.
|
||||
--debug -d Verbose output. For developers only.
|
||||
--format=s -F s Output format. Recognized values:
|
||||
a root + list of arcs
|
||||
p parenthesized notation
|
||||
h human readable indented format
|
||||
c CONLL format
|
||||
Multiple values are allowed. (default p)
|
||||
--info=s -I s Information printed. Recognized values:
|
||||
n node identifier
|
||||
f surface form
|
||||
m morphological information
|
||||
l arc labels\
|
||||
--gphids Used gph node identifiers (default: linear)
|
||||
--dgpids Used dgp node identifiers (default: linear)
|
||||
--graph Do not generate trees, just print the graph.
|
||||
--uniq -u Remove duplicate trees.
|
||||
--utt UTT formatted output.
|
||||
|
||||
END
|
||||
|
||||
$DEBUG=false
|
||||
$FORMAT='p'
|
||||
$INFO='DEFAULT'
|
||||
$ONLYTREES=false
|
||||
$UTTOUTPUT=false
|
||||
$START=nil
|
||||
$END=nil
|
||||
$FOREST=false
|
||||
$MAXSIZE=nil
|
||||
$GPHIDS=false
|
||||
$DGPIDS=false
|
||||
$GRAPH==false
|
||||
|
||||
opts.each do |opt, arg|
|
||||
case opt
|
||||
@ -57,12 +72,18 @@ opts.each do |opt, arg|
|
||||
$FORMAT=arg
|
||||
when '--info'
|
||||
$INFO=arg
|
||||
when '--only-trees'
|
||||
$ONLYTREES=true
|
||||
when '--gphids'
|
||||
$GPHIDS=true
|
||||
when '--dgpids'
|
||||
$DGPIDS=true
|
||||
when '--graph'
|
||||
$GRAPH=true
|
||||
when '--uniq'
|
||||
$UNIQ=true
|
||||
when '--utt'
|
||||
$UTTOUTPUT=true
|
||||
when '--forest'
|
||||
$FOREST=true
|
||||
when '--ground'
|
||||
$GROUND=true
|
||||
when '--maxsize'
|
||||
$MAXSIZE=arg.to_i
|
||||
when '--span'
|
||||
@ -75,7 +96,7 @@ end
|
||||
if $INFO=='DEFAULT'
|
||||
case $FORMAT
|
||||
when 'p','a'
|
||||
$INFO='nl'
|
||||
$INFO='fl'
|
||||
when 'h'
|
||||
$INFO='fmnl'
|
||||
end
|
||||
@ -94,7 +115,7 @@ def tre(input)
|
||||
tokennumber=0
|
||||
for line in input
|
||||
seg=Seg.new(line)
|
||||
print line unless $ONLYTREES || seg.field(3) == 'EOS'
|
||||
print line if $UTTOUTPUT && seg.field(3) == 'EOS'
|
||||
|
||||
if dgp=seg['dgp']
|
||||
if nodes==[] && seg[3]!='BOS'
|
||||
@ -121,23 +142,22 @@ def tre(input)
|
||||
if seg[3]=='EOS'
|
||||
|
||||
$pref = "#{seg[1]} #{seg[2]} SYN *"
|
||||
|
||||
parsegraph(nodes)
|
||||
|
||||
set_ord #(0...(nodes.length)).each{|i| set_distance_from_i i }
|
||||
|
||||
printgraph if $DEBUG
|
||||
|
||||
if $GROUND
|
||||
if $GRAPH
|
||||
if $FORMAT =~ /c/
|
||||
printconll
|
||||
else
|
||||
printground
|
||||
end
|
||||
else
|
||||
thetrees = $FOREST ? genforest : gentrees
|
||||
|
||||
output_trees thetrees
|
||||
|
||||
print line unless $ONLYTREES
|
||||
|
||||
$gphid=[] # POWTÓRZENIE
|
||||
outputs = output_trees thetrees
|
||||
outputs = outputs.sort.uniq if $UNIQ
|
||||
print outputs.join
|
||||
print line if $UTTOUTPUT
|
||||
$gphid=[]
|
||||
$form=[]
|
||||
$lem=[]
|
||||
$ord1=[]
|
||||
@ -153,29 +173,47 @@ end
|
||||
|
||||
|
||||
def output_trees trees
|
||||
|
||||
outputs = []
|
||||
|
||||
for t in trees
|
||||
$count += 1
|
||||
# t1=ground(t)
|
||||
|
||||
t1=t
|
||||
|
||||
span = $FOREST ? " span:" + (ground_tree_min(t1).to_s + ","+ground_tree_max(t1).to_s)+";" : ""
|
||||
# span = $FOREST ? " span:" + (ground_tree_min(t1).to_s + ","+ground_tree_max(t1).to_s)+";" : ""
|
||||
# case $FORMAT
|
||||
# when /a/
|
||||
# outputs << "#{$pref} tre:#{$count}#{span} #{arc_output(t1)}\n"
|
||||
# when /p/
|
||||
# outputs << "#{$pref}#{span} tre:#{$count} par:#{par_output(t1)}\n"
|
||||
# when /h/
|
||||
# outputs << "#\n# tree #{$count}\n# ------\n#{dgp_output(t1,0)}"
|
||||
# when /c/
|
||||
# outputs << conll_output(t1,0)
|
||||
# end
|
||||
|
||||
case $FORMAT
|
||||
when /a/
|
||||
print "#{$pref} tre:#{$count}#{span} #{arcsinfo(t1[0],t1[1])}"
|
||||
# print arcsinfo(t1[0],t1[1])
|
||||
print "\n"
|
||||
outputs << "#{arc_output(t1)}\n"
|
||||
when /p/
|
||||
print "#{$pref}#{span} tre:#{$count} par:"
|
||||
printpar(t1[0],t1[1])
|
||||
print "\n"
|
||||
outputs << "#{par_output(t1)}\n"
|
||||
when /h/
|
||||
print "#\n# tree #{$count}\n# ------\n"
|
||||
printtree_dgp(t1[0],t1[1],0)
|
||||
end
|
||||
end
|
||||
outputs << human_output(t1,0)
|
||||
when /c/
|
||||
outputs << conll_output(t1,0)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
outputs
|
||||
|
||||
end
|
||||
|
||||
def id_output id
|
||||
if $DGPIDS then id elsif $GPHIDS then $gphid[id] else $ord1[$gphid[id]] end
|
||||
end
|
||||
|
||||
def nodeinfo(id)
|
||||
info=""
|
||||
@ -185,7 +223,7 @@ def nodeinfo(id)
|
||||
info += '.' if $INFO =~ /[nfm]/
|
||||
end
|
||||
if $INFO =~ /n/
|
||||
info += gphid.to_s
|
||||
info += id_output(id).to_s
|
||||
info += '.' if $INFO =~ /[fm]/
|
||||
end
|
||||
if $INFO =~ /f/
|
||||
@ -199,13 +237,9 @@ def nodeinfo(id)
|
||||
end
|
||||
|
||||
|
||||
def arcsinfo(root,arcs)
|
||||
def arc_output(tree)
|
||||
root, arcs = tree
|
||||
"head:#{nodeinfo(root)} links:" + arcs.map{|a| "(#{($INFO =~ /l/) ? a[2]+":" : ""}#{nodeinfo(a[0])}-#{nodeinfo(a[1])})"}.join("")
|
||||
# for a in arcs
|
||||
# print ';'
|
||||
# print "#{a[2]}:" if $INFO =~ /l/
|
||||
# print nodeinfo(a[0])+'-'+nodeinfo(a[1])
|
||||
# end
|
||||
end
|
||||
|
||||
def printtree(root,arcs,o)
|
||||
@ -220,54 +254,68 @@ def printtree(root,arcs,o)
|
||||
end
|
||||
end
|
||||
|
||||
def printtree_dgp(root,arcs,o)
|
||||
def human_output(tree,o)
|
||||
root, arcs = tree
|
||||
output = ''
|
||||
if o==0
|
||||
print "%-16s" % "root: "
|
||||
output += "%-16s" % "root: "
|
||||
end
|
||||
print nodeinfo(root),"\n"
|
||||
output += nodeinfo(root) + "\n"
|
||||
for arc in arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
|
||||
print " "*(o+1)
|
||||
print "%-16s" % (arc[2]+": ")
|
||||
printtree_dgp(arc[1],arcs,o+1)
|
||||
output += " "*(o+1)
|
||||
output += "%-16s" % (arc[2]+": ")
|
||||
output += human_output([arc[1],arcs],o+1)
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
# old:
|
||||
# def printpar(root,arcs)
|
||||
# print nodeinfo(root)
|
||||
# deps = arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
|
||||
# unless deps == []
|
||||
# print '('
|
||||
# cont=false
|
||||
# for arc in deps
|
||||
# if cont then print ',' else cont=true end
|
||||
# print arc[2],':' if $INFO =~ /l/
|
||||
# printpar(arc[1],arcs)
|
||||
# end
|
||||
# print ')'
|
||||
# end
|
||||
# end
|
||||
|
||||
def printpar(root,arcs)
|
||||
def conll_output(tree,o)
|
||||
root,arcs = tree
|
||||
nodes = ([root] + arcs.map{|a| a[1]}).sort{|a,b| $gphid[a] <=> $gphid[b]}
|
||||
conll_lines = []
|
||||
for i in nodes
|
||||
gphid = $gphid[i]
|
||||
id = $ord1[gphid]
|
||||
form = $form[gphid]
|
||||
/^(?<lemma>.*),(?<cpostag>[^\/]*)(\/(?<feats>.+))?/ =~ $lem[gphid]
|
||||
thearcs = arcs.select{|a| a[1]==i }.map{|a| [$ord1[$gphid[a[0]]],a[2]] }
|
||||
thearcs = [[0,'root']] if thearcs.empty?
|
||||
for a in thearcs
|
||||
head,deprel = a
|
||||
conll_lines << [id,form,lemma,cpostag,cpostag,feats,head,deprel,nil,nil].map{|s| s ? s.to_s : "_"}.join("\t")
|
||||
end
|
||||
end
|
||||
conll_lines.join("\n") + "\n\n"
|
||||
end
|
||||
|
||||
def par_output(tree)
|
||||
root, arcs = tree
|
||||
ldeps = arcs.select{|a| a[0]==root and $gphid[a[1]] < $gphid[root]}.sort{|a,b| $gphid[a[1]]<=>$gphid[b[1]] }
|
||||
rdeps = arcs.select{|a| a[0]==root and $gphid[a[1]] > $gphid[root]}.sort{|a,b| $gphid[a[1]]<=>$gphid[b[1]] }
|
||||
|
||||
for arc in ldeps
|
||||
print ' ('
|
||||
print arc[2].upcase if $INFO =~ /l/
|
||||
printpar(arc[1],arcs)
|
||||
print ')'
|
||||
end
|
||||
output = ''
|
||||
|
||||
print ' ',nodeinfo(root)
|
||||
output_left = ldeps.map{|arc| ' (' + (($INFO =~ /l/) ? arc[2].upcase : '') + par_output([arc[1],arcs]) + ')'}.join
|
||||
output_right = rdeps.map{|arc| ' (' + (($INFO =~ /l/) ? arc[2].upcase : '') + par_output([arc[1],arcs]) + ')'}.join
|
||||
|
||||
# for arc in ldeps
|
||||
# output += ' ('
|
||||
# output += arc[2].upcase if $INFO =~ /l/
|
||||
# output += par_output(arc[1],arcs)
|
||||
# output += ')'
|
||||
# end
|
||||
|
||||
# print ' ',nodeinfo(root)
|
||||
|
||||
# for arc in rdeps
|
||||
# print ' ('
|
||||
# print arc[2].upcase if $INFO =~ /l/
|
||||
# printpar(arc[1],arcs)
|
||||
# print ')'
|
||||
# end
|
||||
|
||||
output_left + ' ' + nodeinfo(root) + output_right
|
||||
|
||||
for arc in rdeps
|
||||
print ' ('
|
||||
print arc[2].upcase if $INFO =~ /l/
|
||||
printpar(arc[1],arcs)
|
||||
print ')'
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@ -466,6 +514,21 @@ def printground
|
||||
end
|
||||
end
|
||||
|
||||
def printconll
|
||||
for i in 1...($form.length-1)
|
||||
id = $ord1[i]
|
||||
form = $form[i]
|
||||
/^(?<lemma>.*),(?<cpostag>[^\/]*)(\/(?<feats>.+))?/ =~ $lem[i]
|
||||
arcs = $arcs.select{|a| $ord1[$gphid[a[1]]] == $ord1[i]}.map{|a| [$ord1[$gphid[a[0]]],a[2]]}.sort.uniq
|
||||
arcs = [[0,'root']] if arcs.empty?
|
||||
for a in arcs
|
||||
head,deprel = a
|
||||
puts [id,form,lemma,cpostag,cpostag,feats,head,deprel,nil,nil].map{|s| s ? s.to_s : "_"}.join("\t")
|
||||
end
|
||||
end
|
||||
puts
|
||||
end
|
||||
|
||||
|
||||
def set_to_s(s) "{#{s.join(',')}}" end
|
||||
def rel_to_s(r) "{#{r.map{|p| "(#{p[0]},#{p[1]})"}.join(',')}}" end
|
||||
|
Loading…
Reference in New Issue
Block a user