From 93afab8cc25b5217cf3a7c44c862904d3b265a6c Mon Sep 17 00:00:00 2001 From: tom Date: Wed, 14 Dec 2011 17:21:24 +0100 Subject: [PATCH] dodany komponent compdic - kompilator slownikow dla lem, cor, kor, gue (compdic zastepuje compiledic) --- _old/app/src/compdic/Makefile | 21 ++++ _old/app/src/compdic/aut2fsa.cc | 18 ++++ _old/app/src/compdic/compdic | 175 ++++++++++++++++++++++++++++++++ _old/app/src/compdic/fsm2aut | 44 ++++++++ _old/app/src/compdic/lst2fstext | 20 ++++ 5 files changed, 278 insertions(+) create mode 100644 _old/app/src/compdic/Makefile create mode 100644 _old/app/src/compdic/aut2fsa.cc create mode 100755 _old/app/src/compdic/compdic create mode 100755 _old/app/src/compdic/fsm2aut create mode 100755 _old/app/src/compdic/lst2fstext diff --git a/_old/app/src/compdic/Makefile b/_old/app/src/compdic/Makefile new file mode 100644 index 0000000..90abb52 --- /dev/null +++ b/_old/app/src/compdic/Makefile @@ -0,0 +1,21 @@ +CFLAG1 = -m32 -Wno-deprecated -O3 -fpermissive +CFLAG_ST = -Wno-deprecated -O3 -fpermissive -static + +all: compdic aut2fsa + +compdic: + + +aut2fsa: aut2fsa.cc + #g++ -m32 -Wno-deprecated -O3 -fpermissive -static -o aut2fsa aut2fsa.cc + g++ $(CFLAG1) -o aut2fsa aut2fsa.cc + + +copy: +ifdef UTT_BIN_DIR + cp compdic fsm2aut aut2fsa lst2fstext ${UTT_BIN_DIR} +endif + +clean: + rm aut2fsa + diff --git a/_old/app/src/compdic/aut2fsa.cc b/_old/app/src/compdic/aut2fsa.cc new file mode 100644 index 0000000..a6a695f --- /dev/null +++ b/_old/app/src/compdic/aut2fsa.cc @@ -0,0 +1,18 @@ + +#include +#include + +#include "../lib/tfti.h" + +#include + +using namespace std; + +int main() +{ + TFTiv a; + a.read(); + a.save(); + + return 0; +} diff --git a/_old/app/src/compdic/compdic b/_old/app/src/compdic/compdic new file mode 100755 index 0000000..f691fbf --- /dev/null +++ b/_old/app/src/compdic/compdic @@ -0,0 +1,175 @@ + +no_of_parts=0 + +while [ $# -gt 2 ] +do + case $1 + in + -p) + no_of_parts=$2 + shift 2 + ;; + + *) + echo "The arguments to use are" + echo "-p: number of parts" + shift 1 + ;; + esac +done + +if [ $# -lt 2 ] +then + echo "Usage:" + echo " compdic [-p ] " + echo "where" + echo " - file containig a list of words, one per line, iso-8859-2 encoded" + echo " - a file to which the compiled automaton (cor/kor format) shoul be written" + exit 0 +fi + +if [ $no_of_parts -eq 0 ] +then + no_of_parts=$(( `cat $1 | wc -l` / 75000 + 1 )) +fi + + +echo number of parts: $no_of_parts + + +tempdir=`mktemp -d /tmp/compdic.XXXXXX` + +alphabet=`tempfile -d $tempdir` + +cat < $alphabet + 0 +a 1 +A 2 +ä 3 +± 4 +¡ 5 +b 6 +B 7 +c 8 +C 9 +æ 10 +Æ 11 +d 12 +D 13 +e 14 +E 15 +é 16 +ê 17 +Ê 18 +f 19 +F 20 +g 21 +G 22 +h 23 +H 24 +i 25 +I 26 +j 27 +J 28 +k 29 +K 30 +l 31 +L 32 +³ 33 +£ 34 +m 35 +M 36 +n 37 +N 38 +ñ 39 +Ñ 40 +o 41 +O 42 +ö 43 +ó 44 +Ó 45 +p 46 +P 47 +q 48 +Q 49 +r 50 +R 51 +s 52 +S 53 +¶ 54 +¦ 55 +t 56 +T 57 +u 58 +U 59 +ü 60 +v 61 +V 62 +w 63 +W 64 +x 65 +X 66 +y 67 +Y 68 +z 69 +Z 70 +¼ 71 +¬ 72 +¿ 73 +¯ 74 +0 75 +1 76 +2 77 +3 78 +4 79 +5 80 +6 81 +7 82 +8 83 +9 84 +_ 85 +- 86 +? 87 +! 88 +~ 89 +; 90 +, 91 +/ 92 +* 93 ++ 94 +EOF + + +no_of_lines=$(( (`cat $1 | wc -l` / $no_of_parts) + 1 )) + +split -l $no_of_lines $1 $tempdir/part. + +automaton=$tempdir/output.fst + +cat < $automaton +EOF + +n=0 + +for f in $tempdir/part.* +do + temp1=`tempfile -d $tempdir` + temp2=`tempfile -d $tempdir` + temp3=`tempfile -d $tempdir` + + n=$(( $n + 1 )) + echo processing part $n + + cat $f |\ + lst2fstext |\ + fstcompile --acceptor --isymbols=$alphabet --keep_isymbols |\ + fstrmepsilon |\ + fstdeterminize > $temp1 + fstminimize $temp1 $temp2 + + fstunion $automaton $temp2 | fstrmepsilon | fstdeterminize > $temp3 + fstminimize $temp3 $automaton +done + +cat $automaton | fsttopsort | fstprint --acceptor | fsm2aut | aut2fsa > $2 +rm -r $tempdir diff --git a/_old/app/src/compdic/fsm2aut b/_old/app/src/compdic/fsm2aut new file mode 100755 index 0000000..ee25876 --- /dev/null +++ b/_old/app/src/compdic/fsm2aut @@ -0,0 +1,44 @@ +#!/usr/bin/perl + +my $currstate=-1; +my @states; +my @final; +my $tn=0; + +while(<>) +{ + if(/^\s*([0-9]+)\s+([0-9]+)\s+(.)(\s*)?$/) + { + push @{$states[$1]}, ($3, $2); + $#states=$2 if $#states<$2; + $tn++; + } + elsif(/^\s*([0-9]+)\s*$/) + { + $final[$1]=1; + $#states=$1 if $#states<$1; + } + else + { + die("Input error."); + } +} + +print scalar(@states)," ",$tn," char void\n"; + +my $i=0; +my $width=int(log(@states+1)/log(10)); +foreach $stateref (@states) +{ + $f = ($final[$i]?"+":"-"); + printf "%${width}d %s",$i++,$f; + while(@$stateref) + { + $c=shift @$stateref; + $s=shift @$stateref; + print " $c $s"; + } + print "\n"; +} + + diff --git a/_old/app/src/compdic/lst2fstext b/_old/app/src/compdic/lst2fstext new file mode 100755 index 0000000..592bd82 --- /dev/null +++ b/_old/app/src/compdic/lst2fstext @@ -0,0 +1,20 @@ +#!/usr/bin/env perl + +use locale; + +$s=1; + +$f=1; +while(<>) +{ + chomp; + @cs = split(''); + ++$s; + print "0 $s \n"; + while($c = shift @cs) + { + print $s . ' ' . ++$s . " $c\n"; + } + print "$s $f \n"; +} +print "$f\n";