diff --git a/app/dist/common/utt_make_config.pl b/app/dist/common/utt_make_config.pl index 62f3294..83365f6 100644 --- a/app/dist/common/utt_make_config.pl +++ b/app/dist/common/utt_make_config.pl @@ -1,236 +1,236 @@ -#!/usr/bin/perl -#c:\usr\perl\bin\perl.exe - -use Cwd 'abs_path'; -use File::Basename; -use File::HomeDir; -use File::Spec::Functions; -use POSIX; - -my $sys_home = catdir(dirname(abs_path($0)), ".."); -my $usr_home = catdir(home(), '.utt'); - -prepareUttUsrHome($usr_home); -conf_utt(catfile($usr_home, 'utt.conf'), $sys_home); - -conf_compiledic(catfile($usr_home, 'compiledic.conf'), $sys_home); -conf_cor(catfile($usr_home, 'cor.conf'), $sys_home); -conf_dgc(catfile($usr_home, 'dgc.conf'), $sys_home); -conf_grp(catfile($usr_home, 'grp.conf'), $sys_home); -conf_gue(catfile($usr_home, 'gue.conf'), $sys_home); -conf_kor(catfile($usr_home, 'kor.conf'), $sys_home); -conf_lem(catfile($usr_home, 'lem.conf'), $sys_home); -conf_ser(catfile($usr_home, 'ser.conf'), $sys_home); - -print "UTT user configuration created in $usr_home\n"; - - - -#Kasuje stare configi i tworzy katalog .utt -sub prepareUttUsrHome() { - my $dir = shift; - - print "Preparing user configuration.\n"; - - if(-d $dir) { - print "Old configuration detected. "; - my $cnt = unlink <$dir/*>; - print "($cnt files deleted)\n"; - } - else { - print "Creating directory $dir\n"; - if(1 != mkdir $dir) { - die "Unable to create UTT user configuration!\n"; - } - } -} - -# Tworzy naglowek dla wszystkich plikow konfiguracyjnych -sub makeConfigHeader() { - return "# ************************************************************\n". - "# * This file was created automatically during installation. *\n". - "# * If you don't need do not change it. *\n". - "# * *\n". - "# * UAM Text Tools *\n". - "# * Adam Mickiewicz University, Poland *\n". - "# * http://utt.amu.edu.pl *\n". - "# ************************************************************\n". - "#\n". - "# All lines must looks like:\n". - "# parameter_name [=] value\n". - "#\n\n"; -} - -sub conf_compiledic() { - my $compiledic_file = shift; - my $utthome = shift; - open(FILE, ">$compiledic_file"); - - print FILE makeConfigHeader(); - my $hm = home(); - my $lang=`grep language $hm/.utt/utt.conf | cut -d= -f 2`; - chomp $lang; -# powinno byc $utthome/share/utt/pl_PL.ISO-8859-2/pl_PL.ISO-8892-2.sym -# ale to bez sensu - print FILE "symbols=", abs_path("$utthome/share/utt"); - print FILE "/$lang/$lang.sym\n"; - - close FILE; -} - -sub conf_grp() { - my $grp_file = shift; - my $utthome = shift; - open(FILE, ">$grp_file"); - - print FILE makeConfigHeader(); - print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n"; - - close FILE; -} - -sub conf_cor() { - my $cor_file = shift; - my $utthome = shift; - open(FILE, ">$cor_file"); - - print FILE makeConfigHeader(); - print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n"; - - close FILE; -} - -sub conf_kor() { - my $kor_file = shift; - my $utthome = shift; - open(FILE, ">$kor_file"); - - print FILE makeConfigHeader(); - print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n"; - print FILE "weights=", abs_path("$utthome/lib/utt/weights.kor"), "\n"; - print FILE "threshold=1.0\n"; - - close FILE; -} - -sub conf_grp() { - my $grp_file = shift; - my $utthome = shift; - open(FILE, ">$grp_file"); - - print FILE makeConfigHeader(); - print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n"; - - close FILE; -} - -sub conf_gue() { - my $gue_file = shift; - my $utthome = shift; - open(FILE, ">$gue_file"); - - print FILE makeConfigHeader(); - - close FILE; -} - -sub conf_lem() { - my $lem_file = shift; - my $utthome = shift; - open(FILE, ">$lem_file"); - - print FILE makeConfigHeader(); - print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n"; - - close FILE; -} - -sub conf_ser() { - my $ser_file = shift; - my $utthome = shift; - open(FILE, ">$ser_file"); - - print FILE makeConfigHeader(); - print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n"; - print FILE "flex-template=", abs_path("$utthome/lib/utt/ser.l.template"), "\n"; - - close FILE; -} - -sub conf_utt() { - my $utt_file = shift; - my $utthome = shift; - - my $lang = getUserLanguage(abs_path(catdir($utthome, "share/utt"))); - - open(FILE, ">$utt_file"); - - print FILE makeConfigHeader(); - print FILE "# user locale (dictionary)\n"; - print FILE "language=", $lang, "\n"; - - close FILE; -} - -sub getUserLanguage() { - my $dict_dir = shift; - - print "Setup dictionaries.\n"; - - opendir(DIR, $dict_dir) || die "Install dictionary first!\n"; - @files = grep(/[^\.{1,2}]/ && -d "$dict_dir/$_", readdir(DIR)); - closedir DIR; - my $cnt = scalar @files; - print "$cnt dictionary(ies) found.\n"; - if(0 == $cnt) { - die("Install dictionary first!\n"); - } - - foreach my $file (@files) { - print "Do you select dictionary $file [yes/no]? "; - my $ans = ; - if($ans =~ /y(es)?/i) { - return $file; - } - } - die "No dictionary selected - setup failed!\n"; -} - -# Szuka locali u uzytkownika -sub findLocale() { - $cur_locale = setlocale(LC_CTYPE); - - # we replace Latinx to ISO-8859-x - $cur_locale =~ s/(.+?)Latin(.+?)/$1ISO\-8859\-$2/g; - - if($cur_locale =~ /\w+_\w+\.\S+/) { - $best_locale = $cur_locale; - } - elsif($cur_locale =~ /\w+_\w+/) { - $best_locale = $cur_locale.".UTF-8"; - } - else { - $best_locale = toupper($cur_locale).'_'.tolower($cur_locale).'.UTF-8'; - } - return $best_locale; -} - -sub conf_dgc() { - my $dgc_file = shift; - my $utthome = shift; - open(FILE, ">$dgc_file"); - - print FILE makeConfigHeader(); - print FILE "categories=", abs_path("$utthome/lib/utt/cats.dgc"), "\n"; - print FILE "grammar=", abs_path("$utthome/lib/utt/gram.dgc"), "\n"; - - close FILE; - - - open(FILE, ">$utthome/bin/Makefile.go"); - print FILE "\ngram.dgp: ", abs_path("$utthome/lib/utt/gram.dgc"), "\n"; - print FILE "\tdgc -c ", abs_path("$utthome/lib/utt/cats.dgc"), " < ", abs_path("$utthome/lib/utt/gram.dgc"), " > gram.dgp\n"; - close FILE; - -} - +#!/usr/bin/perl +#c:\usr\perl\bin\perl.exe + +use Cwd 'abs_path'; +use File::Basename; +use File::HomeDir; +use File::Spec::Functions; +use POSIX; + +my $sys_home = catdir(dirname(abs_path($0)), ".."); +my $usr_home = catdir(home(), '.utt'); + +prepareUttUsrHome($usr_home); +conf_utt(catfile($usr_home, 'utt.conf'), $sys_home); + +conf_cor(catfile($usr_home, 'cor.conf'), $sys_home); +conf_kor(catfile($usr_home, 'kor.conf'), $sys_home); +conf_compiledic(catfile($usr_home, 'compiledic.conf'), $sys_home); +conf_grp(catfile($usr_home, 'grp.conf'), $sys_home); +conf_gue(catfile($usr_home, 'gue.conf'), $sys_home); +conf_lem(catfile($usr_home, 'lem.conf'), $sys_home); +conf_ser(catfile($usr_home, 'ser.conf'), $sys_home); +conf_dgc(catfile($usr_home, 'dgc.conf'), $sys_home); + +print "UTT user configuration created in $usr_home\n"; + + + +#Kasuje stare configi i tworzy katalog .utt +sub prepareUttUsrHome() { + my $dir = shift; + + print "Preparing user configuration.\n"; + + if(-d $dir) { + print "Old configuration detected. "; + my $cnt = unlink <$dir/*>; + print "($cnt files deleted)\n"; + } + else { + print "Creating directory $dir\n"; + if(1 != mkdir $dir) { + die "Unable to create UTT user configuration!\n"; + } + } +} + +# Tworzy naglowek dla wszystkich plikow konfiguracyjnych +sub makeConfigHeader() { + return "# ************************************************************\n". + "# * This file was created automatically during installation. *\n". + "# * If you don't need do not change it. *\n". + "# * *\n". + "# * UAM Text Tools *\n". + "# * Adam Mickiewicz University, Poland *\n". + "# * http://utt.amu.edu.pl *\n". + "# ************************************************************\n". + "#\n". + "# All lines must looks like:\n". + "# parameter_name [=] value\n". + "#\n\n"; +} + +sub conf_compiledic() { + my $compiledic_file = shift; + my $utthome = shift; + open(FILE, ">$compiledic_file"); + + print FILE makeConfigHeader(); + my $hm = home(); + my $lang=`grep language $hm/.utt/utt.conf | cut -d= -f 2`; + chomp $lang; +# powinno byc $utthome/share/utt/pl_PL.ISO-8859-2/pl_PL.ISO-8892-2.sym +# ale to bez sensu + print FILE "symbols=", abs_path("$utthome/share/utt"); + print FILE "/$lang/$lang.sym\n"; + + close FILE; +} + +sub conf_grp() { + my $grp_file = shift; + my $utthome = shift; + open(FILE, ">$grp_file"); + + print FILE makeConfigHeader(); + print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n"; + + close FILE; +} + +sub conf_cor() { + my $cor_file = shift; + my $utthome = shift; + open(FILE, ">$cor_file"); + + print FILE makeConfigHeader(); + print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n"; + + close FILE; +} + +sub conf_kor() { + my $kor_file = shift; + my $utthome = shift; + open(FILE, ">$kor_file"); + + print FILE makeConfigHeader(); + print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n"; + print FILE "weights=", abs_path("$utthome/lib/utt/weights.cor"), "\n"; + print FILE "threshold=1.0\n"; + + close FILE; +} + +sub conf_grp() { + my $grp_file = shift; + my $utthome = shift; + open(FILE, ">$grp_file"); + + print FILE makeConfigHeader(); + print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n"; + + close FILE; +} + +sub conf_gue() { + my $gue_file = shift; + my $utthome = shift; + open(FILE, ">$gue_file"); + + print FILE makeConfigHeader(); + + close FILE; +} + +sub conf_lem() { + my $lem_file = shift; + my $utthome = shift; + open(FILE, ">$lem_file"); + + print FILE makeConfigHeader(); + print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n"; + + close FILE; +} + +sub conf_ser() { + my $ser_file = shift; + my $utthome = shift; + open(FILE, ">$ser_file"); + + print FILE makeConfigHeader(); + print FILE "macros=", abs_path("$utthome/lib/utt/terms.m4"), "\n"; + print FILE "flex-template=", abs_path("$utthome/lib/utt/ser.l.template"), "\n"; + + close FILE; +} + +sub conf_utt() { + my $utt_file = shift; + my $utthome = shift; + + my $lang = getUserLanguage(abs_path(catdir($utthome, "share/utt"))); + + open(FILE, ">$utt_file"); + + print FILE makeConfigHeader(); + print FILE "# user locale (dictionary)\n"; + print FILE "language=", $lang, "\n"; + + close FILE; +} + +sub getUserLanguage() { + my $dict_dir = shift; + + print "Setup dictionaries.\n"; + + opendir(DIR, $dict_dir) || die "Install dictionary first!\n"; + @files = grep(/[^\.{1,2}]/ && -d "$dict_dir/$_", readdir(DIR)); + closedir DIR; + my $cnt = scalar @files; + print "$cnt dictionary(ies) found.\n"; + if(0 == $cnt) { + die("Install dictionary first!\n"); + } + + foreach my $file (@files) { + print "Do you select dictionary $file [yes/no]? "; + my $ans = ; + if($ans =~ /y(es)?/i) { + return $file; + } + } + die "No dictionary selected - setup failed!\n"; +} + +# Szuka locali u uzytkownika +sub findLocale() { + $cur_locale = setlocale(LC_CTYPE); + + # we replace Latinx to ISO-8859-x + $cur_locale =~ s/(.+?)Latin(.+?)/$1ISO\-8859\-$2/g; + + if($cur_locale =~ /\w+_\w+\.\S+/) { + $best_locale = $cur_locale; + } + elsif($cur_locale =~ /\w+_\w+/) { + $best_locale = $cur_locale.".UTF-8"; + } + else { + $best_locale = toupper($cur_locale).'_'.tolower($cur_locale).'.UTF-8'; + } + return $best_locale; +} + +sub conf_dgc() { + my $dgc_file = shift; + my $utthome = shift; + open(FILE, ">$dgc_file"); + + print FILE makeConfigHeader(); + print FILE "catfile=", abs_path("$utthome/lib/utt/cats.dgc"), "\n"; + print FILE "gramfile=", abs_path("$utthome/lib/utt/gram.dgc"), "\n"; + + close FILE; + + + open(FILE, ">$utthome/bin/Makefile.go"); + print FILE "\ngram.dgp: ", abs_path("$utthome/lib/utt/gram.dgc"), "\n"; + print FILE "\tdgc -c ", abs_path("$utthome/lib/utt/cats.dgc"), " < ", abs_path("$utthome/lib/utt/gram.dgc"), " > gram.dgp\n"; + close FILE; + +} + diff --git a/app/src/dgp/dgc b/app/src/dgp/dgc index 2f98ce0..a1f2545 100755 --- a/app/src/dgp/dgc +++ b/app/src/dgp/dgc @@ -5,18 +5,83 @@ #version: 1.0 #author: Tomasz Obrebski +# wymaga niejawnie programu canonize!!!! #use lib "ENV{HOME}/.utt/lib/perl"; -#use strict; + +use strict; use Getopt::Long; use Data::Dumper; use attr; -#use File::HomeDir; +use File::HomeDir; + +my $systemconfigfile='/usr/local/etc/utt/dgc.conf'; +my $userconfigfile=home()."/.utt/dgc.conf"; + +Getopt::Long::Configure('no_ignore_case_always'); my $help=0; my $catfile=0; my $dicfile=0; my $gramfile=0; +my $outputfile=0; + +#read configuration files########################### +my $file; +foreach $file ($systemconfigfile, $userconfigfile){ + if(open(CONFIG, $file)){ + while () { + chomp; + s/#.*//; + s/^\s+//; + s/\s+$//; + next unless length; + my ($name, $value) = split(/\s*=\s*/, $_, 2); + if(($name eq "catfile")or($name eq "c")){ + $catfile=$value; + } + elsif(($name eq "dicfile")or($name eq "d")){ + $dicfile=$value; + } + elsif(($name eq "gramfile")or($name eq "g")){ + $gramfile=$value; + } + elsif(($name eq "outputfile")or($name eq "o")){ + $outputfile=$value; + } + elsif(($name eq "help")or($name eq "h")){ + $help=1; + } + + } + close CONFIG; + } +} +######################################################### + +GetOptions("help|h" => \$help, + "catfile|c=s" => \$catfile, + "dicfile|d=s" => \$dicfile, + "gramfile|g=s" => \$gramfile, + "outputfile|o=s" => \$outputfile); + +if($help) +{ + print <<'END' +Usage: dgc [OPTIONS] + +Options: + --catfile -c filename List of syntactic categories. + --dicfile -d filename Dictionary. + --gramfile -g filename List of grammar rules. + --outputfile -o filename Output filename. + --help -h Help. +END +; + exit 0; +} + +die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile; my $ncat=0; my $nrole=0; @@ -26,42 +91,44 @@ my $nright=0; my $nreq=0; my $nlink=0; -GetOptions("help|h" => \$help, - "catfile|c=s" => \$catfile, - "dicfile|d=s" => \$dicfile, - "gramfile|g=s" => \$gramfile); - -if($help) -{ - print <<'END' -Usage: dgpcompile [OPTIONS] - -Options: - --cats -c filename List of syntactic categories. - --dic -d filename Dictionary. - --help -h Help. -END -; - exit 0; -} - -die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile; - my %cats; my %roles; my %agr; my %gov; +if(!$outputfile) { + *OUTPUT = *STDOUT; +} +elsif($outputfile eq "-") { + *OUTPUT = *STDOUT; +} +else { + open(OUTPUT, ">$outputfile") or die("Can't open output file: $outputfile!"); +} + + + loadcats($catfile) if $catfile; extractcats($dicfile) if $dicfile; -$cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/; +my $cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/; # class parse_class: # /$attr::cat_re/g; -while(<>) + +if(!$gramfile) { + *INPUT = *STDIN; +} +elsif($gramfile eq "-"){ + *INPUT = *STDIN; +} +else { + open(INPUT, $gramfile) or die("Unable to open: $gramfile!"); +} + +while() { if(/^\s*AGR\s+(\S+)\s+(\S+)\s*$/) { @@ -74,22 +141,22 @@ while(<>) elsif(/^\s*ROLE\s+\S+\s*$/) { $roles{$_}=1; - print; + print OUTPUT; } elsif(/^\s*SGL\s+\S+\s*$/) { ++$nsgl; - print; + print OUTPUT; } elsif(/^\s*REQ\s+(\S+)\s+(\S+)\s*$/) { - print "#$_"; + print OUTPUT "#$_"; my $cat = attr::parse $1; for my $atomcat (keys %cats) { if(attr::match @$cat, @{$cats{$atomcat}}) { - print "REQ ".$atomcat." $2\n"; + print OUTPUT "REQ ".$atomcat." $2\n"; ++$nreq; } } @@ -97,19 +164,19 @@ while(<>) elsif(/^\s*LEFT\s+\S+\s*$/) { ++$nleft; - print; + print OUTPUT; } elsif(/^\s*RIGHT\s+\S+\s*$/) { ++$nright; - print; + print OUTPUT; } - elsif(($hs,$ds,$r) = /^\s*LINK\s+($cats_re)\s+($cats_re)\s+(\S+)\s*$/) + elsif(my ($hs,$ds,$r) = /^\s*LINK\s+($cats_re)\s+($cats_re)\s+(\S+)\s*$/) { - print "#$_"; - for $h ($hs =~ /$attr::cat_re/g) + print OUTPUT "#$_"; + for my $h ($hs =~ /$attr::cat_re/g) { - for $d ($ds =~ /$attr::cat_re/g) + for my $d ($ds =~ /$attr::cat_re/g) { addlinks($h,$d,$r); } @@ -118,17 +185,17 @@ while(<>) else { - print; + print OUTPUT; } } sub addlinks { - ($h,$d,$r) = @_; + my ($h,$d,$r) = @_; - for my $a (@{$agr{$r}}) { print "#AGR $r $a\n"; } - for my $c (@{$gov{$r}}) { print "#GOV $r ".attr::unparse(@$c)."\n"; } + for my $a (@{$agr{$r}}) { print OUTPUT "#AGR $r $a\n"; } + for my $c (@{$gov{$r}}) { print OUTPUT "#GOV $r ".attr::unparse(@$c)."\n"; } my $head = attr::parse $h; my $dep = attr::parse $d; @@ -151,9 +218,9 @@ sub addlinks next DEP if ! attr::match(@$c,@{$cats{$atomdep}}); } - print "LINK "; - print $atomhead." "; - print $atomdep." $r\n"; + print OUTPUT "LINK "; + print OUTPUT $atomhead." "; + print OUTPUT $atomdep." $r\n"; ++$nlink; } @@ -179,10 +246,10 @@ sub extractcats { while(/,([^[:space:];]+)/g) { - $cat=$1; + my $cat=$1; next if !$cat || exists $cats{$cat}; $ncat++; - print "CAT $1\n"; + print OUTPUT "CAT $1\n"; $cats{$cat}=attr::parse($cat); } } @@ -198,9 +265,10 @@ sub loadcats { tr/ \t\n//d; next if !$_ || exists $cats{$_}; - print "CAT $_\n"; + print OUTPUT "CAT $_\n"; ++$ncat; $cats{$_}=attr::parse($_); } close CATFILE; } + diff --git a/app/src/gph/gph b/app/src/gph/gph index 74fb304..64c2e4f 100755 --- a/app/src/gph/gph +++ b/app/src/gph/gph @@ -7,17 +7,72 @@ use strict; use Getopt::Long; +use File::HomeDir; + + + + + +my $systemconfigfile='/usr/local/etc/utt/gph.conf'; +my $userconfigfile=home()."/.utt/gph.conf"; + +Getopt::Long::Configure('no_ignore_case_always'); -my @process; my $help=0; +my $inputfile=0; +my $outputfile=0; +my @process=(); my $reset; -my $interactive=1; +my $interactive=0; + +#read configuration files########################### +my $file; +my @process_conf=(); +foreach $file ($systemconfigfile, $userconfigfile){ + if(open(CONFIG, $file)){ + while () { + chomp; + s/#.*//; + s/^\s+//; + s/\s+$//; + next unless length; + my ($name, $value) = split(/\s*=\s*/, $_, 2); + if(($name eq "inputfile")or($name eq "f")){ + $inputfile=$value; + } + elsif(($name eq "outputfile")or($name eq "o")){ + $outputfile=$value; + } + elsif(($name eq "process")or($name eq "p")){ + push @process_conf, $value; + } + elsif(($name eq "reset")or($name eq "r")){ + $reset=$value; + } + elsif(($name eq "interactive")or($name eq "i")){ + $interactive=1; + } + elsif(($name eq "help")or($name eq "h")){ + $help=1; + } + + } + close CONFIG; + } +} +######################################################### + + GetOptions("process|p=s" => \@process, + "inputfile|f=s" => \$inputfile, + "outputfile|o=s" => \$outputfile, "help|h" => \$help, "reset|r=s" => \$reset, "interactive|i" => \$interactive); +@process = @process_conf if @process<1; + if($help) { print <<'END' @@ -26,9 +81,9 @@ Usage: gph [OPTIONS] Options: -p tag Process segments with this tag as nodes. -r tag Start new graph at this tag. - -f filename Input file (NIE DZIALA). - -o filename Output file (NIE DZIALA). - -i Toggle interactive mode (default=on). + -f filename Input file. + -o filename Output file. + -i Toggle interactive mode (default=off). END ; exit 0; @@ -37,11 +92,25 @@ END $|=1 if $interactive; -my @prev; +if(!$inputfile or $inputfile eq "-") { + *INPUT = *STDIN; +} +else { + open(INPUT, "$inputfile") or die("Can't open input file: $inputfile!"); +} + +if(!$outputfile or $outputfile eq "-") { + *OUTPUT = *STDOUT; +} +else { + open(OUTPUT, "$outputfile") or die("Can't open output file: $outputfile!"); +} + +my @prev; my $n=0; -while(<>) +while() { chomp; my $do=0; @@ -88,6 +157,6 @@ while(<>) } - print $_.$gph."\n"; + print OUTPUT $_.$gph."\n"; }