gph i dgc obsluguja configi

git-svn-id: svn://atos.wmid.amu.edu.pl/utt@33 e293616e-ec6a-49c2-aa92-f4a8b91c5d16
This commit is contained in:
pawelk 2008-04-24 10:36:13 +00:00
parent a5fdde9613
commit 19dfa5cb76
3 changed files with 426 additions and 289 deletions

View File

@ -1,236 +1,236 @@
#!/usr/bin/perl
#c:\usr\perl\bin\perl.exe
use Cwd 'abs_path';
use File::Basename;
use File::HomeDir;
use File::Spec::Functions;
use POSIX;
my $sys_home = catdir(dirname(abs_path($0)), "..");
my $usr_home = catdir(home(), '.utt');
prepareUttUsrHome($usr_home);
conf_utt(catfile($usr_home, 'utt.conf'), $sys_home);
conf_compiledic(catfile($usr_home, 'compiledic.conf'), $sys_home);
conf_cor(catfile($usr_home, 'cor.conf'), $sys_home);
conf_dgc(catfile($usr_home, 'dgc.conf'), $sys_home);
conf_grp(catfile($usr_home, 'grp.conf'), $sys_home);
conf_gue(catfile($usr_home, 'gue.conf'), $sys_home);
conf_kor(catfile($usr_home, 'kor.conf'), $sys_home);
conf_lem(catfile($usr_home, 'lem.conf'), $sys_home);
conf_ser(catfile($usr_home, 'ser.conf'), $sys_home);
print "UTT user configuration created in $usr_home\n";
#Kasuje stare configi i tworzy katalog .utt
sub prepareUttUsrHome() {
my $dir = shift;
print "Preparing user configuration.\n";
if(-d $dir) {
print "Old configuration detected. ";
my $cnt = unlink <$dir/*>;
print "($cnt files deleted)\n";
}
else {
print "Creating directory $dir\n";
if(1 != mkdir $dir) {
die "Unable to create UTT user configuration!\n";
}
}
}
# Tworzy naglowek dla wszystkich plikow konfiguracyjnych
sub makeConfigHeader() {
return "# ************************************************************\n".
"# * This file was created automatically during installation. *\n".
"# * If you don't need do not change it. *\n".
"# * *\n".
"# * UAM Text Tools *\n".
"# * Adam Mickiewicz University, Poland *\n".
"# * http://utt.amu.edu.pl *\n".
"# ************************************************************\n".
"#\n".
"# All lines must looks like:\n".
"# parameter_name [=] value\n".
"#\n\n";
}
sub conf_compiledic() {
my $compiledic_file = shift;
my $utthome = shift;
open(FILE, ">$compiledic_file");
print FILE makeConfigHeader();
my $hm = home();
my $lang=`grep language $hm/.utt/utt.conf | cut -d= -f 2`;
chomp $lang;
# powinno byc $utthome/share/utt/pl_PL.ISO-8859-2/pl_PL.ISO-8892-2.sym
# ale to bez sensu
print FILE "symbols=", abs_path("$utthome/share/utt");
print FILE "/$lang/$lang.sym\n";
close FILE;
}
sub conf_grp() {
my $grp_file = shift;
my $utthome = shift;
open(FILE, ">$grp_file");
print FILE makeConfigHeader();
print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n";
close FILE;
}
sub conf_cor() {
my $cor_file = shift;
my $utthome = shift;
open(FILE, ">$cor_file");
print FILE makeConfigHeader();
print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n";
close FILE;
}
sub conf_kor() {
my $kor_file = shift;
my $utthome = shift;
open(FILE, ">$kor_file");
print FILE makeConfigHeader();
print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n";
print FILE "weights=", abs_path("$utthome/lib/utt/weights.kor"), "\n";
print FILE "threshold=1.0\n";
close FILE;
}
sub conf_grp() {
my $grp_file = shift;
my $utthome = shift;
open(FILE, ">$grp_file");
print FILE makeConfigHeader();
print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n";
close FILE;
}
sub conf_gue() {
my $gue_file = shift;
my $utthome = shift;
open(FILE, ">$gue_file");
print FILE makeConfigHeader();
close FILE;
}
sub conf_lem() {
my $lem_file = shift;
my $utthome = shift;
open(FILE, ">$lem_file");
print FILE makeConfigHeader();
print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n";
close FILE;
}
sub conf_ser() {
my $ser_file = shift;
my $utthome = shift;
open(FILE, ">$ser_file");
print FILE makeConfigHeader();
print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n";
print FILE "flex-template=", abs_path("$utthome/lib/utt/ser.l.template"), "\n";
close FILE;
}
sub conf_utt() {
my $utt_file = shift;
my $utthome = shift;
my $lang = getUserLanguage(abs_path(catdir($utthome, "share/utt")));
open(FILE, ">$utt_file");
print FILE makeConfigHeader();
print FILE "# user locale (dictionary)\n";
print FILE "language=", $lang, "\n";
close FILE;
}
sub getUserLanguage() {
my $dict_dir = shift;
print "Setup dictionaries.\n";
opendir(DIR, $dict_dir) || die "Install dictionary first!\n";
@files = grep(/[^\.{1,2}]/ && -d "$dict_dir/$_", readdir(DIR));
closedir DIR;
my $cnt = scalar @files;
print "$cnt dictionary(ies) found.\n";
if(0 == $cnt) {
die("Install dictionary first!\n");
}
foreach my $file (@files) {
print "Do you select dictionary $file [yes/no]? ";
my $ans = <STDIN>;
if($ans =~ /y(es)?/i) {
return $file;
}
}
die "No dictionary selected - setup failed!\n";
}
# Szuka locali u uzytkownika
sub findLocale() {
$cur_locale = setlocale(LC_CTYPE);
# we replace Latinx to ISO-8859-x
$cur_locale =~ s/(.+?)Latin(.+?)/$1ISO\-8859\-$2/g;
if($cur_locale =~ /\w+_\w+\.\S+/) {
$best_locale = $cur_locale;
}
elsif($cur_locale =~ /\w+_\w+/) {
$best_locale = $cur_locale.".UTF-8";
}
else {
$best_locale = toupper($cur_locale).'_'.tolower($cur_locale).'.UTF-8';
}
return $best_locale;
}
sub conf_dgc() {
my $dgc_file = shift;
my $utthome = shift;
open(FILE, ">$dgc_file");
print FILE makeConfigHeader();
print FILE "categories=", abs_path("$utthome/lib/utt/cats.dgc"), "\n";
print FILE "grammar=", abs_path("$utthome/lib/utt/gram.dgc"), "\n";
close FILE;
open(FILE, ">$utthome/bin/Makefile.go");
print FILE "\ngram.dgp: ", abs_path("$utthome/lib/utt/gram.dgc"), "\n";
print FILE "\tdgc -c ", abs_path("$utthome/lib/utt/cats.dgc"), " < ", abs_path("$utthome/lib/utt/gram.dgc"), " > gram.dgp\n";
close FILE;
}
#!/usr/bin/perl
#c:\usr\perl\bin\perl.exe
use Cwd 'abs_path';
use File::Basename;
use File::HomeDir;
use File::Spec::Functions;
use POSIX;
my $sys_home = catdir(dirname(abs_path($0)), "..");
my $usr_home = catdir(home(), '.utt');
prepareUttUsrHome($usr_home);
conf_utt(catfile($usr_home, 'utt.conf'), $sys_home);
conf_cor(catfile($usr_home, 'cor.conf'), $sys_home);
conf_kor(catfile($usr_home, 'kor.conf'), $sys_home);
conf_compiledic(catfile($usr_home, 'compiledic.conf'), $sys_home);
conf_grp(catfile($usr_home, 'grp.conf'), $sys_home);
conf_gue(catfile($usr_home, 'gue.conf'), $sys_home);
conf_lem(catfile($usr_home, 'lem.conf'), $sys_home);
conf_ser(catfile($usr_home, 'ser.conf'), $sys_home);
conf_dgc(catfile($usr_home, 'dgc.conf'), $sys_home);
print "UTT user configuration created in $usr_home\n";
#Kasuje stare configi i tworzy katalog .utt
sub prepareUttUsrHome() {
my $dir = shift;
print "Preparing user configuration.\n";
if(-d $dir) {
print "Old configuration detected. ";
my $cnt = unlink <$dir/*>;
print "($cnt files deleted)\n";
}
else {
print "Creating directory $dir\n";
if(1 != mkdir $dir) {
die "Unable to create UTT user configuration!\n";
}
}
}
# Tworzy naglowek dla wszystkich plikow konfiguracyjnych
sub makeConfigHeader() {
return "# ************************************************************\n".
"# * This file was created automatically during installation. *\n".
"# * If you don't need do not change it. *\n".
"# * *\n".
"# * UAM Text Tools *\n".
"# * Adam Mickiewicz University, Poland *\n".
"# * http://utt.amu.edu.pl *\n".
"# ************************************************************\n".
"#\n".
"# All lines must looks like:\n".
"# parameter_name [=] value\n".
"#\n\n";
}
sub conf_compiledic() {
my $compiledic_file = shift;
my $utthome = shift;
open(FILE, ">$compiledic_file");
print FILE makeConfigHeader();
my $hm = home();
my $lang=`grep language $hm/.utt/utt.conf | cut -d= -f 2`;
chomp $lang;
# powinno byc $utthome/share/utt/pl_PL.ISO-8859-2/pl_PL.ISO-8892-2.sym
# ale to bez sensu
print FILE "symbols=", abs_path("$utthome/share/utt");
print FILE "/$lang/$lang.sym\n";
close FILE;
}
sub conf_grp() {
my $grp_file = shift;
my $utthome = shift;
open(FILE, ">$grp_file");
print FILE makeConfigHeader();
print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n";
close FILE;
}
sub conf_cor() {
my $cor_file = shift;
my $utthome = shift;
open(FILE, ">$cor_file");
print FILE makeConfigHeader();
print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n";
close FILE;
}
sub conf_kor() {
my $kor_file = shift;
my $utthome = shift;
open(FILE, ">$kor_file");
print FILE makeConfigHeader();
print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n";
print FILE "weights=", abs_path("$utthome/lib/utt/weights.cor"), "\n";
print FILE "threshold=1.0\n";
close FILE;
}
sub conf_grp() {
my $grp_file = shift;
my $utthome = shift;
open(FILE, ">$grp_file");
print FILE makeConfigHeader();
print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n";
close FILE;
}
sub conf_gue() {
my $gue_file = shift;
my $utthome = shift;
open(FILE, ">$gue_file");
print FILE makeConfigHeader();
close FILE;
}
sub conf_lem() {
my $lem_file = shift;
my $utthome = shift;
open(FILE, ">$lem_file");
print FILE makeConfigHeader();
print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n";
close FILE;
}
sub conf_ser() {
my $ser_file = shift;
my $utthome = shift;
open(FILE, ">$ser_file");
print FILE makeConfigHeader();
print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n";
print FILE "flex-template=", abs_path("$utthome/lib/utt/ser.l.template"), "\n";
close FILE;
}
sub conf_utt() {
my $utt_file = shift;
my $utthome = shift;
my $lang = getUserLanguage(abs_path(catdir($utthome, "share/utt")));
open(FILE, ">$utt_file");
print FILE makeConfigHeader();
print FILE "# user locale (dictionary)\n";
print FILE "language=", $lang, "\n";
close FILE;
}
sub getUserLanguage() {
my $dict_dir = shift;
print "Setup dictionaries.\n";
opendir(DIR, $dict_dir) || die "Install dictionary first!\n";
@files = grep(/[^\.{1,2}]/ && -d "$dict_dir/$_", readdir(DIR));
closedir DIR;
my $cnt = scalar @files;
print "$cnt dictionary(ies) found.\n";
if(0 == $cnt) {
die("Install dictionary first!\n");
}
foreach my $file (@files) {
print "Do you select dictionary $file [yes/no]? ";
my $ans = <STDIN>;
if($ans =~ /y(es)?/i) {
return $file;
}
}
die "No dictionary selected - setup failed!\n";
}
# Szuka locali u uzytkownika
sub findLocale() {
$cur_locale = setlocale(LC_CTYPE);
# we replace Latinx to ISO-8859-x
$cur_locale =~ s/(.+?)Latin(.+?)/$1ISO\-8859\-$2/g;
if($cur_locale =~ /\w+_\w+\.\S+/) {
$best_locale = $cur_locale;
}
elsif($cur_locale =~ /\w+_\w+/) {
$best_locale = $cur_locale.".UTF-8";
}
else {
$best_locale = toupper($cur_locale).'_'.tolower($cur_locale).'.UTF-8';
}
return $best_locale;
}
sub conf_dgc() {
my $dgc_file = shift;
my $utthome = shift;
open(FILE, ">$dgc_file");
print FILE makeConfigHeader();
print FILE "catfile=", abs_path("$utthome/lib/utt/cats.dgc"), "\n";
print FILE "gramfile=", abs_path("$utthome/lib/utt/gram.dgc"), "\n";
close FILE;
open(FILE, ">$utthome/bin/Makefile.go");
print FILE "\ngram.dgp: ", abs_path("$utthome/lib/utt/gram.dgc"), "\n";
print FILE "\tdgc -c ", abs_path("$utthome/lib/utt/cats.dgc"), " < ", abs_path("$utthome/lib/utt/gram.dgc"), " > gram.dgp\n";
close FILE;
}

View File

@ -5,18 +5,83 @@
#version: 1.0
#author: Tomasz Obrebski
# wymaga niejawnie programu canonize!!!!
#use lib "ENV{HOME}/.utt/lib/perl";
#use strict;
use strict;
use Getopt::Long;
use Data::Dumper;
use attr;
#use File::HomeDir;
use File::HomeDir;
my $systemconfigfile='/usr/local/etc/utt/dgc.conf';
my $userconfigfile=home()."/.utt/dgc.conf";
Getopt::Long::Configure('no_ignore_case_always');
my $help=0;
my $catfile=0;
my $dicfile=0;
my $gramfile=0;
my $outputfile=0;
#read configuration files###########################
my $file;
foreach $file ($systemconfigfile, $userconfigfile){
if(open(CONFIG, $file)){
while (<CONFIG>) {
chomp;
s/#.*//;
s/^\s+//;
s/\s+$//;
next unless length;
my ($name, $value) = split(/\s*=\s*/, $_, 2);
if(($name eq "catfile")or($name eq "c")){
$catfile=$value;
}
elsif(($name eq "dicfile")or($name eq "d")){
$dicfile=$value;
}
elsif(($name eq "gramfile")or($name eq "g")){
$gramfile=$value;
}
elsif(($name eq "outputfile")or($name eq "o")){
$outputfile=$value;
}
elsif(($name eq "help")or($name eq "h")){
$help=1;
}
}
close CONFIG;
}
}
#########################################################
GetOptions("help|h" => \$help,
"catfile|c=s" => \$catfile,
"dicfile|d=s" => \$dicfile,
"gramfile|g=s" => \$gramfile,
"outputfile|o=s" => \$outputfile);
if($help)
{
print <<'END'
Usage: dgc [OPTIONS]
Options:
--catfile -c filename List of syntactic categories.
--dicfile -d filename Dictionary.
--gramfile -g filename List of grammar rules.
--outputfile -o filename Output filename.
--help -h Help.
END
;
exit 0;
}
die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile;
my $ncat=0;
my $nrole=0;
@ -26,42 +91,44 @@ my $nright=0;
my $nreq=0;
my $nlink=0;
GetOptions("help|h" => \$help,
"catfile|c=s" => \$catfile,
"dicfile|d=s" => \$dicfile,
"gramfile|g=s" => \$gramfile);
if($help)
{
print <<'END'
Usage: dgpcompile [OPTIONS]
Options:
--cats -c filename List of syntactic categories.
--dic -d filename Dictionary.
--help -h Help.
END
;
exit 0;
}
die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile;
my %cats;
my %roles;
my %agr;
my %gov;
if(!$outputfile) {
*OUTPUT = *STDOUT;
}
elsif($outputfile eq "-") {
*OUTPUT = *STDOUT;
}
else {
open(OUTPUT, ">$outputfile") or die("Can't open output file: $outputfile!");
}
loadcats($catfile) if $catfile;
extractcats($dicfile) if $dicfile;
$cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/;
my $cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/;
# class parse_class:
# /$attr::cat_re/g;
while(<>)
if(!$gramfile) {
*INPUT = *STDIN;
}
elsif($gramfile eq "-"){
*INPUT = *STDIN;
}
else {
open(INPUT, $gramfile) or die("Unable to open: $gramfile!");
}
while(<INPUT>)
{
if(/^\s*AGR\s+(\S+)\s+(\S+)\s*$/)
{
@ -74,22 +141,22 @@ while(<>)
elsif(/^\s*ROLE\s+\S+\s*$/)
{
$roles{$_}=1;
print;
print OUTPUT;
}
elsif(/^\s*SGL\s+\S+\s*$/)
{
++$nsgl;
print;
print OUTPUT;
}
elsif(/^\s*REQ\s+(\S+)\s+(\S+)\s*$/)
{
print "#$_";
print OUTPUT "#$_";
my $cat = attr::parse $1;
for my $atomcat (keys %cats)
{
if(attr::match @$cat, @{$cats{$atomcat}})
{
print "REQ ".$atomcat." $2\n";
print OUTPUT "REQ ".$atomcat." $2\n";
++$nreq;
}
}
@ -97,19 +164,19 @@ while(<>)
elsif(/^\s*LEFT\s+\S+\s*$/)
{
++$nleft;
print;
print OUTPUT;
}
elsif(/^\s*RIGHT\s+\S+\s*$/)
{
++$nright;
print;
print OUTPUT;
}
elsif(($hs,$ds,$r) = /^\s*LINK\s+($cats_re)\s+($cats_re)\s+(\S+)\s*$/)
elsif(my ($hs,$ds,$r) = /^\s*LINK\s+($cats_re)\s+($cats_re)\s+(\S+)\s*$/)
{
print "#$_";
for $h ($hs =~ /$attr::cat_re/g)
print OUTPUT "#$_";
for my $h ($hs =~ /$attr::cat_re/g)
{
for $d ($ds =~ /$attr::cat_re/g)
for my $d ($ds =~ /$attr::cat_re/g)
{
addlinks($h,$d,$r);
}
@ -118,17 +185,17 @@ while(<>)
else
{
print;
print OUTPUT;
}
}
sub addlinks
{
($h,$d,$r) = @_;
my ($h,$d,$r) = @_;
for my $a (@{$agr{$r}}) { print "#AGR $r $a\n"; }
for my $c (@{$gov{$r}}) { print "#GOV $r ".attr::unparse(@$c)."\n"; }
for my $a (@{$agr{$r}}) { print OUTPUT "#AGR $r $a\n"; }
for my $c (@{$gov{$r}}) { print OUTPUT "#GOV $r ".attr::unparse(@$c)."\n"; }
my $head = attr::parse $h;
my $dep = attr::parse $d;
@ -151,9 +218,9 @@ sub addlinks
next DEP if ! attr::match(@$c,@{$cats{$atomdep}});
}
print "LINK ";
print $atomhead." ";
print $atomdep." $r\n";
print OUTPUT "LINK ";
print OUTPUT $atomhead." ";
print OUTPUT $atomdep." $r\n";
++$nlink;
}
@ -179,10 +246,10 @@ sub extractcats
{
while(/,([^[:space:];]+)/g)
{
$cat=$1;
my $cat=$1;
next if !$cat || exists $cats{$cat};
$ncat++;
print "CAT $1\n";
print OUTPUT "CAT $1\n";
$cats{$cat}=attr::parse($cat);
}
}
@ -198,9 +265,10 @@ sub loadcats
{
tr/ \t\n//d;
next if !$_ || exists $cats{$_};
print "CAT $_\n";
print OUTPUT "CAT $_\n";
++$ncat;
$cats{$_}=attr::parse($_);
}
close CATFILE;
}

View File

@ -7,17 +7,72 @@
use strict;
use Getopt::Long;
use File::HomeDir;
my $systemconfigfile='/usr/local/etc/utt/gph.conf';
my $userconfigfile=home()."/.utt/gph.conf";
Getopt::Long::Configure('no_ignore_case_always');
my @process;
my $help=0;
my $inputfile=0;
my $outputfile=0;
my @process=();
my $reset;
my $interactive=1;
my $interactive=0;
#read configuration files###########################
my $file;
my @process_conf=();
foreach $file ($systemconfigfile, $userconfigfile){
if(open(CONFIG, $file)){
while (<CONFIG>) {
chomp;
s/#.*//;
s/^\s+//;
s/\s+$//;
next unless length;
my ($name, $value) = split(/\s*=\s*/, $_, 2);
if(($name eq "inputfile")or($name eq "f")){
$inputfile=$value;
}
elsif(($name eq "outputfile")or($name eq "o")){
$outputfile=$value;
}
elsif(($name eq "process")or($name eq "p")){
push @process_conf, $value;
}
elsif(($name eq "reset")or($name eq "r")){
$reset=$value;
}
elsif(($name eq "interactive")or($name eq "i")){
$interactive=1;
}
elsif(($name eq "help")or($name eq "h")){
$help=1;
}
}
close CONFIG;
}
}
#########################################################
GetOptions("process|p=s" => \@process,
"inputfile|f=s" => \$inputfile,
"outputfile|o=s" => \$outputfile,
"help|h" => \$help,
"reset|r=s" => \$reset,
"interactive|i" => \$interactive);
@process = @process_conf if @process<1;
if($help)
{
print <<'END'
@ -26,9 +81,9 @@ Usage: gph [OPTIONS]
Options:
-p tag Process segments with this tag as nodes.
-r tag Start new graph at this tag.
-f filename Input file (NIE DZIALA).
-o filename Output file (NIE DZIALA).
-i Toggle interactive mode (default=on).
-f filename Input file.
-o filename Output file.
-i Toggle interactive mode (default=off).
END
;
exit 0;
@ -37,11 +92,25 @@ END
$|=1 if $interactive;
my @prev;
if(!$inputfile or $inputfile eq "-") {
*INPUT = *STDIN;
}
else {
open(INPUT, "$inputfile") or die("Can't open input file: $inputfile!");
}
if(!$outputfile or $outputfile eq "-") {
*OUTPUT = *STDOUT;
}
else {
open(OUTPUT, "$outputfile") or die("Can't open output file: $outputfile!");
}
my @prev;
my $n=0;
while(<>)
while(<INPUT>)
{
chomp;
my $do=0;
@ -88,6 +157,6 @@ while(<>)
}
print $_.$gph."\n";
print OUTPUT $_.$gph."\n";
}