utt/_old/nawszelkiwypadek/tools/gue_dic/prep.pl
tom a6e708f37f ANULOWANIE POPRZEDNIEGO COMMITU
Revert "Replacing old implementation with working implementation"

This reverts commit 1e121f45e2.

 Please enter the commit message for your changes. Lines starting
 with '#' will be ignored, and an empty message aborts the commit.

 Committer: tom <tom@lim.(none)>

 On branch master
 Changes to be committed:
   (use "git reset HEAD <file>..." to unstage)

	modified:   _old/app/Makefile
	deleted:    _old/app/conf/Makefile
	deleted:    _old/app/conf/compiledic.conf
	deleted:    _old/app/conf/cor.conf
	deleted:    _old/app/conf/dgc.conf
	deleted:    _old/app/conf/dgp.conf
	deleted:    _old/app/conf/gph.conf
	deleted:    _old/app/conf/grp.conf
	deleted:    _old/app/conf/gue.conf
	deleted:    _old/app/conf/kor.conf
	deleted:    _old/app/conf/lem.conf
	deleted:    _old/app/conf/mar.conf
	deleted:    _old/app/conf/ser.conf
	deleted:    _old/app/conf/utt.conf
	modified:   _old/app/src/common/Makefile
	modified:   _old/app/src/compiledic/Makefile
	modified:   _old/app/src/compiledic/aut2fsa.cc
	modified:   _old/app/src/cor/Makefile
	modified:   _old/app/src/dgp/Makefile
	new file:   _old/app/src/dgp/canonize
	new file:   _old/app/src/dgp/dgc
	modified:   _old/app/src/dgp/grammar.hh
	modified:   _old/app/src/dgp/mgraph.hh
	modified:   _old/app/src/dgp/sgraph.hh
	modified:   _old/app/src/dgp/thesymbols.hh
	new file:   _old/app/src/dgp/tre
	modified:   _old/app/src/gue/Makefile
	modified:   _old/app/src/gue/guess.cc
	modified:   _old/app/src/kor/Makefile
	modified:   _old/app/src/kor/corlist.cc
	modified:   _old/app/src/kor/corr.cc
	new file:   _old/app/src/kor/corr.hh
	modified:   _old/app/src/kor/main.cc
	modified:   _old/app/src/lem/Makefile
	modified:   _old/app/src/lem/lem.cc
	modified:   _old/app/src/lib/Makefile
	modified:   _old/app/src/lib/auttools.cc
	modified:   _old/app/src/lib/symtab.cc
	modified:   _old/app/src/lib/tft.h
	modified:   _old/app/src/lib/tfti.h
	modified:   _old/app/src/lib/ttrans.h
	modified:   _old/app/src/lib/word.cc
	modified:   _old/app/src/lib/word.h
	modified:   _old/app/src/tok.c/Makefile
	modified:   _old/app/src/tok.c/cmdline_tok.ggo
	modified:   _old/app/src/tok.c/common_tok.cc
	modified:   _old/app/src/tok/Makefile
	modified:   _old/nawszelkiwypadek/tools/aut2fsa
	modified:   _old/nawszelkiwypadek/tools/cor_dic/makeLabels.pl
	modified:   _old/nawszelkiwypadek/tools/cor_dic/prep.pl
	modified:   _old/nawszelkiwypadek/tools/fsm2aut
	modified:   _old/nawszelkiwypadek/tools/gue_dic/canon.pl
	modified:   _old/nawszelkiwypadek/tools/gue_dic/compile_user_dict.pl
	modified:   _old/nawszelkiwypadek/tools/gue_dic/count_prefs.pl
	modified:   _old/nawszelkiwypadek/tools/gue_dic/cut_prefs.pl
	modified:   _old/nawszelkiwypadek/tools/gue_dic/makeLabels.pl
	modified:   _old/nawszelkiwypadek/tools/gue_dic/prep.pl
	modified:   _old/nawszelkiwypadek/tools/gue_dic/prep_user_dict.pl
	modified:   _old/nawszelkiwypadek/tools/gue_dic/rmDup.pl
	modified:   _old/nawszelkiwypadek/tools/gue_dic/stat.pl
	modified:   _old/nawszelkiwypadek/tools/gue_dic/stat_pre.pl
	modified:   _old/nawszelkiwypadek/tools/lem_dic/makeLabels.pl
	modified:   _old/nawszelkiwypadek/tools/lem_dic/prep.pl
	modified:   auto/defaults
	modified:   auto/options
	modified:   auto/output/Makefile
	modified:   auto/output/config_h
	modified:   auto/summary
	modified:   configure
2011-12-14 16:08:41 +01:00

214 lines
3.6 KiB
Perl
Executable File

#! /usr/bin/perl
use locale;
$linesPerFile = 20000;
if (@ARGV < 1) {
print "usage: prep.pl dictionary_file\n";
exit;
}
$file = shift; # @ARGV;
$kind = shift;
if ($kind eq "") {
$kind="suf";
}
# Przygotowanie etykiet
`makeLabels.pl > labels.sym`;
`lexmakelab labels`;
# Analiza pliku s³ownika
print "Kanonizujê opisy.........................................";
`canon.pl <$file >temp2`;
print "OK\n";
print "Analizuje prefiksy.......................................";
`count_prefs.pl 2 4 < temp2 > prefs`;
`sort -k1,1 -k3,3nr prefs > prefsS`;
`cut_prefs.pl 0.5 0.01 100 prefsS > prefs`;
`rm prefsS`;
print "OK\n";
print "Analizujê plik s³ownika";
if ($kind eq "pre") {
print "(pre).............................";
`stat_pre.pl temp2 > temp1`;
} else {
print "(suf).............................";
`stat.pl prefs < temp2 > temp1`;
}
print "OK\n";
# zmniejszamy plik...
print "Sortujê plik.............................................";
`sort -t \\~ -k1,1 -k2,2nr <temp1 > temp2`;
print "OK\n";
print "Minimalizujê plik s³ownika...............................";
`rmDup.pl < temp2 > temp1`;
#`rmDup2.pl < temp1 > temp2`;
`cp temp1 temp2`;
`rm temp1`;
print "OK\n";
#dzielimy plik na wiele czê¶ci, uruchamiamy lexcomplex dla ka¿dej
#czê¶ci osobno, nastêpnie ³±czymy to za pomoc± programu fsmunion
print "Dzielê s³ownik na mniejsze czê¶ci........................";
open(IN, "./temp2");
$lineCount = 0;
$fileCount = 0;
`mkdir LemTEMP`;
open(FILE, ">LemTEMP/slo_0");
while (<IN>) {
if (++$lineCount >= $linesPerFile) {
$fileCount++;
$lineCount = 0;
close(FILE);
# print "Tworzê nowy plik tymczasowy: slo_".$fileCount."\n";
open(FILE, ">LemTEMP/slo_".$fileCount);
}
print(FILE $_);
}
print "OK\n";
print "Tworzê automaty po¶rednie";
#32 kropki, fileCount plikow
$filesPerDot = $fileCount/32;
$files=$filesPerDot;
$dots=0;
for ($i=0; $i<=$fileCount; $i++) {
if ($files >= $filesPerDot) {
$files = 0;
print ".";
$dots++;
}
$files++;
$command = "lexcomplex -l labels.lab -S labels.scl < LemTEMP/slo_".$i." > LemTEMP/slownik_".$i.".fsm";
`$command`;
}
if ($dots < 32) {
for ($i=0; $i<32 - $dots; $i++) {
print ".";
}
}
print "OK\n";
`rm LemTEMP/slo_*`;
print "Tworzê automat koñcowy";
#35 kropek...
$filesPerDot = $fileCount/35;
$files=$filesPerDot;
$dots=0;
`cp LemTEMP/slownik_0.fsm slownik1.fsm`;
for ($i=1; $i<=$filecount; $i++) {
if ($files >= $filesPerDot) {
$files = 0;
print ".";
$dots++;
}
$files++;
$command = "fsmunion LemTEMP/slownik_".$i." slownik1.fsm > slownik2.fsm";
`$command`;
`mv slownik2.fsm slownik1.fsm`;
}
if ($dots < 35) {
for ($i=0; $i<35 - $dots; $i++) {
print ".";
}
}
`fsmunion LemTEMP/* > slownik1.fsm`;
print "OK\n";
print "Usuwam epsilon-przejscia.................................";
`fsmrmepsilon slownik1.fsm > slownik2.fsm`;
`rm slownik1.fsm`;
print "OK\n";
print "Determinizujê automat....................................";
`fsmdeterminize slownik2.fsm > slownik1.fsm`;
`rm slownik2.fsm`;
print "OK\n";
print "Minimalizujê automat.....................................";
`fsmminimize slownik1.fsm > slownik.fsm`;
`rm slownik1.fsm`;
print "OK\n";
print "Konwertujê automat do formatu fsa........................";
`fsmprint -i labels.lab slownik.fsm > slownik.txt`;
`../fsm2aut slownik.txt > slownik.aut`;
`../aut2fsa < slownik.aut > gue.bin`;
print "OK\n";
print "Czyszczê pliki pomocnicze................................";
`rm LemTEMP/*`;
`rmdir LemTEMP`;
`rm temp2`;
`rm slownik.fsm`;
`rm slownik.txt`;
`rm slownik.aut`;
print "OK\n";