gph i dgc obsluguja configi
git-svn-id: svn://atos.wmid.amu.edu.pl/utt@33 e293616e-ec6a-49c2-aa92-f4a8b91c5d16
This commit is contained in:
parent
a5fdde9613
commit
19dfa5cb76
26
app/dist/common/utt_make_config.pl
vendored
26
app/dist/common/utt_make_config.pl
vendored
@ -13,14 +13,14 @@ my $usr_home = catdir(home(), '.utt');
|
|||||||
prepareUttUsrHome($usr_home);
|
prepareUttUsrHome($usr_home);
|
||||||
conf_utt(catfile($usr_home, 'utt.conf'), $sys_home);
|
conf_utt(catfile($usr_home, 'utt.conf'), $sys_home);
|
||||||
|
|
||||||
conf_compiledic(catfile($usr_home, 'compiledic.conf'), $sys_home);
|
|
||||||
conf_cor(catfile($usr_home, 'cor.conf'), $sys_home);
|
conf_cor(catfile($usr_home, 'cor.conf'), $sys_home);
|
||||||
conf_dgc(catfile($usr_home, 'dgc.conf'), $sys_home);
|
conf_kor(catfile($usr_home, 'kor.conf'), $sys_home);
|
||||||
|
conf_compiledic(catfile($usr_home, 'compiledic.conf'), $sys_home);
|
||||||
conf_grp(catfile($usr_home, 'grp.conf'), $sys_home);
|
conf_grp(catfile($usr_home, 'grp.conf'), $sys_home);
|
||||||
conf_gue(catfile($usr_home, 'gue.conf'), $sys_home);
|
conf_gue(catfile($usr_home, 'gue.conf'), $sys_home);
|
||||||
conf_kor(catfile($usr_home, 'kor.conf'), $sys_home);
|
|
||||||
conf_lem(catfile($usr_home, 'lem.conf'), $sys_home);
|
conf_lem(catfile($usr_home, 'lem.conf'), $sys_home);
|
||||||
conf_ser(catfile($usr_home, 'ser.conf'), $sys_home);
|
conf_ser(catfile($usr_home, 'ser.conf'), $sys_home);
|
||||||
|
conf_dgc(catfile($usr_home, 'dgc.conf'), $sys_home);
|
||||||
|
|
||||||
print "UTT user configuration created in $usr_home\n";
|
print "UTT user configuration created in $usr_home\n";
|
||||||
|
|
||||||
@ -33,15 +33,15 @@ sub prepareUttUsrHome() {
|
|||||||
print "Preparing user configuration.\n";
|
print "Preparing user configuration.\n";
|
||||||
|
|
||||||
if(-d $dir) {
|
if(-d $dir) {
|
||||||
print "Old configuration detected. ";
|
print "Old configuration detected. ";
|
||||||
my $cnt = unlink <$dir/*>;
|
my $cnt = unlink <$dir/*>;
|
||||||
print "($cnt files deleted)\n";
|
print "($cnt files deleted)\n";
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
print "Creating directory $dir\n";
|
print "Creating directory $dir\n";
|
||||||
if(1 != mkdir $dir) {
|
if(1 != mkdir $dir) {
|
||||||
die "Unable to create UTT user configuration!\n";
|
die "Unable to create UTT user configuration!\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,7 +107,7 @@ sub conf_kor() {
|
|||||||
|
|
||||||
print FILE makeConfigHeader();
|
print FILE makeConfigHeader();
|
||||||
print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n";
|
print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n";
|
||||||
print FILE "weights=", abs_path("$utthome/lib/utt/weights.kor"), "\n";
|
print FILE "weights=", abs_path("$utthome/lib/utt/weights.cor"), "\n";
|
||||||
print FILE "threshold=1.0\n";
|
print FILE "threshold=1.0\n";
|
||||||
|
|
||||||
close FILE;
|
close FILE;
|
||||||
@ -221,8 +221,8 @@ sub conf_dgc() {
|
|||||||
open(FILE, ">$dgc_file");
|
open(FILE, ">$dgc_file");
|
||||||
|
|
||||||
print FILE makeConfigHeader();
|
print FILE makeConfigHeader();
|
||||||
print FILE "categories=", abs_path("$utthome/lib/utt/cats.dgc"), "\n";
|
print FILE "catfile=", abs_path("$utthome/lib/utt/cats.dgc"), "\n";
|
||||||
print FILE "grammar=", abs_path("$utthome/lib/utt/gram.dgc"), "\n";
|
print FILE "gramfile=", abs_path("$utthome/lib/utt/gram.dgc"), "\n";
|
||||||
|
|
||||||
close FILE;
|
close FILE;
|
||||||
|
|
||||||
|
158
app/src/dgp/dgc
158
app/src/dgp/dgc
@ -5,18 +5,83 @@
|
|||||||
#version: 1.0
|
#version: 1.0
|
||||||
#author: Tomasz Obrebski
|
#author: Tomasz Obrebski
|
||||||
|
|
||||||
|
# wymaga niejawnie programu canonize!!!!
|
||||||
#use lib "ENV{HOME}/.utt/lib/perl";
|
#use lib "ENV{HOME}/.utt/lib/perl";
|
||||||
#use strict;
|
|
||||||
|
use strict;
|
||||||
use Getopt::Long;
|
use Getopt::Long;
|
||||||
use Data::Dumper;
|
use Data::Dumper;
|
||||||
|
|
||||||
use attr;
|
use attr;
|
||||||
#use File::HomeDir;
|
use File::HomeDir;
|
||||||
|
|
||||||
|
my $systemconfigfile='/usr/local/etc/utt/dgc.conf';
|
||||||
|
my $userconfigfile=home()."/.utt/dgc.conf";
|
||||||
|
|
||||||
|
Getopt::Long::Configure('no_ignore_case_always');
|
||||||
|
|
||||||
my $help=0;
|
my $help=0;
|
||||||
my $catfile=0;
|
my $catfile=0;
|
||||||
my $dicfile=0;
|
my $dicfile=0;
|
||||||
my $gramfile=0;
|
my $gramfile=0;
|
||||||
|
my $outputfile=0;
|
||||||
|
|
||||||
|
#read configuration files###########################
|
||||||
|
my $file;
|
||||||
|
foreach $file ($systemconfigfile, $userconfigfile){
|
||||||
|
if(open(CONFIG, $file)){
|
||||||
|
while (<CONFIG>) {
|
||||||
|
chomp;
|
||||||
|
s/#.*//;
|
||||||
|
s/^\s+//;
|
||||||
|
s/\s+$//;
|
||||||
|
next unless length;
|
||||||
|
my ($name, $value) = split(/\s*=\s*/, $_, 2);
|
||||||
|
if(($name eq "catfile")or($name eq "c")){
|
||||||
|
$catfile=$value;
|
||||||
|
}
|
||||||
|
elsif(($name eq "dicfile")or($name eq "d")){
|
||||||
|
$dicfile=$value;
|
||||||
|
}
|
||||||
|
elsif(($name eq "gramfile")or($name eq "g")){
|
||||||
|
$gramfile=$value;
|
||||||
|
}
|
||||||
|
elsif(($name eq "outputfile")or($name eq "o")){
|
||||||
|
$outputfile=$value;
|
||||||
|
}
|
||||||
|
elsif(($name eq "help")or($name eq "h")){
|
||||||
|
$help=1;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
close CONFIG;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#########################################################
|
||||||
|
|
||||||
|
GetOptions("help|h" => \$help,
|
||||||
|
"catfile|c=s" => \$catfile,
|
||||||
|
"dicfile|d=s" => \$dicfile,
|
||||||
|
"gramfile|g=s" => \$gramfile,
|
||||||
|
"outputfile|o=s" => \$outputfile);
|
||||||
|
|
||||||
|
if($help)
|
||||||
|
{
|
||||||
|
print <<'END'
|
||||||
|
Usage: dgc [OPTIONS]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--catfile -c filename List of syntactic categories.
|
||||||
|
--dicfile -d filename Dictionary.
|
||||||
|
--gramfile -g filename List of grammar rules.
|
||||||
|
--outputfile -o filename Output filename.
|
||||||
|
--help -h Help.
|
||||||
|
END
|
||||||
|
;
|
||||||
|
exit 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile;
|
||||||
|
|
||||||
my $ncat=0;
|
my $ncat=0;
|
||||||
my $nrole=0;
|
my $nrole=0;
|
||||||
@ -26,42 +91,44 @@ my $nright=0;
|
|||||||
my $nreq=0;
|
my $nreq=0;
|
||||||
my $nlink=0;
|
my $nlink=0;
|
||||||
|
|
||||||
GetOptions("help|h" => \$help,
|
|
||||||
"catfile|c=s" => \$catfile,
|
|
||||||
"dicfile|d=s" => \$dicfile,
|
|
||||||
"gramfile|g=s" => \$gramfile);
|
|
||||||
|
|
||||||
if($help)
|
|
||||||
{
|
|
||||||
print <<'END'
|
|
||||||
Usage: dgpcompile [OPTIONS]
|
|
||||||
|
|
||||||
Options:
|
|
||||||
--cats -c filename List of syntactic categories.
|
|
||||||
--dic -d filename Dictionary.
|
|
||||||
--help -h Help.
|
|
||||||
END
|
|
||||||
;
|
|
||||||
exit 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile;
|
|
||||||
|
|
||||||
my %cats;
|
my %cats;
|
||||||
my %roles;
|
my %roles;
|
||||||
my %agr;
|
my %agr;
|
||||||
my %gov;
|
my %gov;
|
||||||
|
|
||||||
|
if(!$outputfile) {
|
||||||
|
*OUTPUT = *STDOUT;
|
||||||
|
}
|
||||||
|
elsif($outputfile eq "-") {
|
||||||
|
*OUTPUT = *STDOUT;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
open(OUTPUT, ">$outputfile") or die("Can't open output file: $outputfile!");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
loadcats($catfile) if $catfile;
|
loadcats($catfile) if $catfile;
|
||||||
extractcats($dicfile) if $dicfile;
|
extractcats($dicfile) if $dicfile;
|
||||||
|
|
||||||
|
|
||||||
$cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/;
|
my $cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/;
|
||||||
|
|
||||||
# class parse_class:
|
# class parse_class:
|
||||||
# /$attr::cat_re/g;
|
# /$attr::cat_re/g;
|
||||||
|
|
||||||
while(<>)
|
|
||||||
|
if(!$gramfile) {
|
||||||
|
*INPUT = *STDIN;
|
||||||
|
}
|
||||||
|
elsif($gramfile eq "-"){
|
||||||
|
*INPUT = *STDIN;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
open(INPUT, $gramfile) or die("Unable to open: $gramfile!");
|
||||||
|
}
|
||||||
|
|
||||||
|
while(<INPUT>)
|
||||||
{
|
{
|
||||||
if(/^\s*AGR\s+(\S+)\s+(\S+)\s*$/)
|
if(/^\s*AGR\s+(\S+)\s+(\S+)\s*$/)
|
||||||
{
|
{
|
||||||
@ -74,22 +141,22 @@ while(<>)
|
|||||||
elsif(/^\s*ROLE\s+\S+\s*$/)
|
elsif(/^\s*ROLE\s+\S+\s*$/)
|
||||||
{
|
{
|
||||||
$roles{$_}=1;
|
$roles{$_}=1;
|
||||||
print;
|
print OUTPUT;
|
||||||
}
|
}
|
||||||
elsif(/^\s*SGL\s+\S+\s*$/)
|
elsif(/^\s*SGL\s+\S+\s*$/)
|
||||||
{
|
{
|
||||||
++$nsgl;
|
++$nsgl;
|
||||||
print;
|
print OUTPUT;
|
||||||
}
|
}
|
||||||
elsif(/^\s*REQ\s+(\S+)\s+(\S+)\s*$/)
|
elsif(/^\s*REQ\s+(\S+)\s+(\S+)\s*$/)
|
||||||
{
|
{
|
||||||
print "#$_";
|
print OUTPUT "#$_";
|
||||||
my $cat = attr::parse $1;
|
my $cat = attr::parse $1;
|
||||||
for my $atomcat (keys %cats)
|
for my $atomcat (keys %cats)
|
||||||
{
|
{
|
||||||
if(attr::match @$cat, @{$cats{$atomcat}})
|
if(attr::match @$cat, @{$cats{$atomcat}})
|
||||||
{
|
{
|
||||||
print "REQ ".$atomcat." $2\n";
|
print OUTPUT "REQ ".$atomcat." $2\n";
|
||||||
++$nreq;
|
++$nreq;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -97,19 +164,19 @@ while(<>)
|
|||||||
elsif(/^\s*LEFT\s+\S+\s*$/)
|
elsif(/^\s*LEFT\s+\S+\s*$/)
|
||||||
{
|
{
|
||||||
++$nleft;
|
++$nleft;
|
||||||
print;
|
print OUTPUT;
|
||||||
}
|
}
|
||||||
elsif(/^\s*RIGHT\s+\S+\s*$/)
|
elsif(/^\s*RIGHT\s+\S+\s*$/)
|
||||||
{
|
{
|
||||||
++$nright;
|
++$nright;
|
||||||
print;
|
print OUTPUT;
|
||||||
}
|
}
|
||||||
elsif(($hs,$ds,$r) = /^\s*LINK\s+($cats_re)\s+($cats_re)\s+(\S+)\s*$/)
|
elsif(my ($hs,$ds,$r) = /^\s*LINK\s+($cats_re)\s+($cats_re)\s+(\S+)\s*$/)
|
||||||
{
|
{
|
||||||
print "#$_";
|
print OUTPUT "#$_";
|
||||||
for $h ($hs =~ /$attr::cat_re/g)
|
for my $h ($hs =~ /$attr::cat_re/g)
|
||||||
{
|
{
|
||||||
for $d ($ds =~ /$attr::cat_re/g)
|
for my $d ($ds =~ /$attr::cat_re/g)
|
||||||
{
|
{
|
||||||
addlinks($h,$d,$r);
|
addlinks($h,$d,$r);
|
||||||
}
|
}
|
||||||
@ -118,17 +185,17 @@ while(<>)
|
|||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
print;
|
print OUTPUT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
sub addlinks
|
sub addlinks
|
||||||
{
|
{
|
||||||
($h,$d,$r) = @_;
|
my ($h,$d,$r) = @_;
|
||||||
|
|
||||||
for my $a (@{$agr{$r}}) { print "#AGR $r $a\n"; }
|
for my $a (@{$agr{$r}}) { print OUTPUT "#AGR $r $a\n"; }
|
||||||
for my $c (@{$gov{$r}}) { print "#GOV $r ".attr::unparse(@$c)."\n"; }
|
for my $c (@{$gov{$r}}) { print OUTPUT "#GOV $r ".attr::unparse(@$c)."\n"; }
|
||||||
my $head = attr::parse $h;
|
my $head = attr::parse $h;
|
||||||
my $dep = attr::parse $d;
|
my $dep = attr::parse $d;
|
||||||
|
|
||||||
@ -151,9 +218,9 @@ sub addlinks
|
|||||||
next DEP if ! attr::match(@$c,@{$cats{$atomdep}});
|
next DEP if ! attr::match(@$c,@{$cats{$atomdep}});
|
||||||
}
|
}
|
||||||
|
|
||||||
print "LINK ";
|
print OUTPUT "LINK ";
|
||||||
print $atomhead." ";
|
print OUTPUT $atomhead." ";
|
||||||
print $atomdep." $r\n";
|
print OUTPUT $atomdep." $r\n";
|
||||||
++$nlink;
|
++$nlink;
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -179,10 +246,10 @@ sub extractcats
|
|||||||
{
|
{
|
||||||
while(/,([^[:space:];]+)/g)
|
while(/,([^[:space:];]+)/g)
|
||||||
{
|
{
|
||||||
$cat=$1;
|
my $cat=$1;
|
||||||
next if !$cat || exists $cats{$cat};
|
next if !$cat || exists $cats{$cat};
|
||||||
$ncat++;
|
$ncat++;
|
||||||
print "CAT $1\n";
|
print OUTPUT "CAT $1\n";
|
||||||
$cats{$cat}=attr::parse($cat);
|
$cats{$cat}=attr::parse($cat);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -198,9 +265,10 @@ sub loadcats
|
|||||||
{
|
{
|
||||||
tr/ \t\n//d;
|
tr/ \t\n//d;
|
||||||
next if !$_ || exists $cats{$_};
|
next if !$_ || exists $cats{$_};
|
||||||
print "CAT $_\n";
|
print OUTPUT "CAT $_\n";
|
||||||
++$ncat;
|
++$ncat;
|
||||||
$cats{$_}=attr::parse($_);
|
$cats{$_}=attr::parse($_);
|
||||||
}
|
}
|
||||||
close CATFILE;
|
close CATFILE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,17 +7,72 @@
|
|||||||
|
|
||||||
use strict;
|
use strict;
|
||||||
use Getopt::Long;
|
use Getopt::Long;
|
||||||
|
use File::HomeDir;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
my $systemconfigfile='/usr/local/etc/utt/gph.conf';
|
||||||
|
my $userconfigfile=home()."/.utt/gph.conf";
|
||||||
|
|
||||||
|
Getopt::Long::Configure('no_ignore_case_always');
|
||||||
|
|
||||||
my @process;
|
|
||||||
my $help=0;
|
my $help=0;
|
||||||
|
my $inputfile=0;
|
||||||
|
my $outputfile=0;
|
||||||
|
my @process=();
|
||||||
my $reset;
|
my $reset;
|
||||||
my $interactive=1;
|
my $interactive=0;
|
||||||
|
|
||||||
|
#read configuration files###########################
|
||||||
|
my $file;
|
||||||
|
my @process_conf=();
|
||||||
|
foreach $file ($systemconfigfile, $userconfigfile){
|
||||||
|
if(open(CONFIG, $file)){
|
||||||
|
while (<CONFIG>) {
|
||||||
|
chomp;
|
||||||
|
s/#.*//;
|
||||||
|
s/^\s+//;
|
||||||
|
s/\s+$//;
|
||||||
|
next unless length;
|
||||||
|
my ($name, $value) = split(/\s*=\s*/, $_, 2);
|
||||||
|
if(($name eq "inputfile")or($name eq "f")){
|
||||||
|
$inputfile=$value;
|
||||||
|
}
|
||||||
|
elsif(($name eq "outputfile")or($name eq "o")){
|
||||||
|
$outputfile=$value;
|
||||||
|
}
|
||||||
|
elsif(($name eq "process")or($name eq "p")){
|
||||||
|
push @process_conf, $value;
|
||||||
|
}
|
||||||
|
elsif(($name eq "reset")or($name eq "r")){
|
||||||
|
$reset=$value;
|
||||||
|
}
|
||||||
|
elsif(($name eq "interactive")or($name eq "i")){
|
||||||
|
$interactive=1;
|
||||||
|
}
|
||||||
|
elsif(($name eq "help")or($name eq "h")){
|
||||||
|
$help=1;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
close CONFIG;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#########################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
GetOptions("process|p=s" => \@process,
|
GetOptions("process|p=s" => \@process,
|
||||||
|
"inputfile|f=s" => \$inputfile,
|
||||||
|
"outputfile|o=s" => \$outputfile,
|
||||||
"help|h" => \$help,
|
"help|h" => \$help,
|
||||||
"reset|r=s" => \$reset,
|
"reset|r=s" => \$reset,
|
||||||
"interactive|i" => \$interactive);
|
"interactive|i" => \$interactive);
|
||||||
|
|
||||||
|
@process = @process_conf if @process<1;
|
||||||
|
|
||||||
if($help)
|
if($help)
|
||||||
{
|
{
|
||||||
print <<'END'
|
print <<'END'
|
||||||
@ -26,9 +81,9 @@ Usage: gph [OPTIONS]
|
|||||||
Options:
|
Options:
|
||||||
-p tag Process segments with this tag as nodes.
|
-p tag Process segments with this tag as nodes.
|
||||||
-r tag Start new graph at this tag.
|
-r tag Start new graph at this tag.
|
||||||
-f filename Input file (NIE DZIALA).
|
-f filename Input file.
|
||||||
-o filename Output file (NIE DZIALA).
|
-o filename Output file.
|
||||||
-i Toggle interactive mode (default=on).
|
-i Toggle interactive mode (default=off).
|
||||||
END
|
END
|
||||||
;
|
;
|
||||||
exit 0;
|
exit 0;
|
||||||
@ -37,11 +92,25 @@ END
|
|||||||
|
|
||||||
$|=1 if $interactive;
|
$|=1 if $interactive;
|
||||||
|
|
||||||
my @prev;
|
|
||||||
|
|
||||||
|
if(!$inputfile or $inputfile eq "-") {
|
||||||
|
*INPUT = *STDIN;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
open(INPUT, "$inputfile") or die("Can't open input file: $inputfile!");
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!$outputfile or $outputfile eq "-") {
|
||||||
|
*OUTPUT = *STDOUT;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
open(OUTPUT, "$outputfile") or die("Can't open output file: $outputfile!");
|
||||||
|
}
|
||||||
|
|
||||||
|
my @prev;
|
||||||
my $n=0;
|
my $n=0;
|
||||||
|
|
||||||
while(<>)
|
while(<INPUT>)
|
||||||
{
|
{
|
||||||
chomp;
|
chomp;
|
||||||
my $do=0;
|
my $do=0;
|
||||||
@ -88,6 +157,6 @@ while(<>)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
print $_.$gph."\n";
|
print OUTPUT $_.$gph."\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user