uzupelnione configi
oprawiona obsluga opcji weight w gue tre znajduje swoja biblioteke (nie znajdywal wczesniej) git-svn-id: svn://atos.wmid.amu.edu.pl/utt@51 e293616e-ec6a-49c2-aa92-f4a8b91c5d16
This commit is contained in:
parent
6ac84d8bba
commit
19760efd7b
1
app/TODO
1
app/TODO
@ -1,6 +1,7 @@
|
||||
BARDZO WAZNE:
|
||||
* przemyslec sposob wybierania jezyka / slownika po zainstalowaniu roznych dystrybucji [PK, TO]
|
||||
* gue nie sortuje wynikow, opcja weights dziala na odwrot
|
||||
* kor nie wykonuje zamian <jednalitera> -> <dwielitery>, np. ż rz
|
||||
|
||||
WAZNE:
|
||||
* zamienic kota na lepszego (Kubis) [TO]
|
||||
|
@ -13,3 +13,4 @@
|
||||
dictionary-home = PATH_PREFIX/share/utt
|
||||
weights = PATH_PREFIX/share/utt/weights.kor
|
||||
threshold = 1.0
|
||||
process=W
|
||||
|
@ -12,3 +12,4 @@
|
||||
#
|
||||
macros = PATH_PREFIX/lib/utt/terms.m4
|
||||
flex-template = PATH_PREFIX/lib/utt/ser.l.template
|
||||
tags=uam
|
||||
|
@ -10,7 +10,7 @@
|
||||
@copying
|
||||
This manual is for UAM Text Tools (version 0.90, November, 2007)
|
||||
|
||||
Copyright @copyright{} 2005, 2007 Tomasz Obrębski, Michał Stolarski, Justyna Walkowska, Paweł Konieczka.
|
||||
Copyright @copyright{} 2005, 2007 Tomasz Obrêbski, Micha³ Stolarski, Justyna Walkowska, Pawe³ Konieczka.
|
||||
|
||||
Permission is granted to copy, distribute and/or modify this document
|
||||
under the terms of the GNU Free Documentation License, Version 1.2
|
||||
@ -127,6 +127,7 @@ List of contributors:
|
||||
@item Michal Stolarski
|
||||
@item Marcin Walas
|
||||
@item Justyna Walkowska
|
||||
@item Paweł Wereński
|
||||
@end itemize
|
||||
|
||||
@c ----------------------------------------------------------------------
|
||||
@ -248,7 +249,7 @@ sentence: @samp{Piszemy dobre progrumy.}
|
||||
|
||||
@example
|
||||
0000 00 BOS *
|
||||
0000 07 W Piszemy lem:pisać,V
|
||||
0000 07 W Piszemy lem:pisaæ,V
|
||||
0007 01 S _
|
||||
0008 05 W dobre lem:dobry,ADJ
|
||||
0013 01 S _
|
||||
@ -259,7 +260,7 @@ sentence: @samp{Piszemy dobre progrumy.}
|
||||
0024 00 BOS *
|
||||
0024 11 W Warszawiacy lem:Warszawiak,N
|
||||
0035 01 S _
|
||||
0036 03 W też
|
||||
0036 03 W te¿
|
||||
0039 01 P .
|
||||
0040 00 EOS *
|
||||
|
||||
@ -267,7 +268,7 @@ sentence: @samp{Piszemy dobre progrumy.}
|
||||
|
||||
@example
|
||||
0000 BOS *
|
||||
0000 W Piszemy lem:pisać,V
|
||||
0000 W Piszemy lem:pisaæ,V
|
||||
0007 S _
|
||||
0008 W dobre lem:dobry,ADJ
|
||||
0013 S _
|
||||
@ -280,7 +281,7 @@ Posion information may be provided only for some types of segments:
|
||||
|
||||
@example
|
||||
0000 BOS *
|
||||
W Piszemy lem:pisać,V
|
||||
W Piszemy lem:pisaæ,V
|
||||
S _
|
||||
W dobre lem:dobry,ADJ
|
||||
S _
|
||||
@ -291,7 +292,7 @@ S _
|
||||
0024 BOS *
|
||||
W Warszawiacy lem:Warszawiak,N
|
||||
S _
|
||||
W też
|
||||
W te¿
|
||||
P .
|
||||
EOS *
|
||||
@end example
|
||||
@ -405,7 +406,7 @@ as ISO, ANSI, DOS, UTF-8 (probably: not tested yet).
|
||||
@c @end table
|
||||
|
||||
|
||||
@c [JAK UZYSKAĆ POLSKIE CZCIONKI W DVI???]
|
||||
@c [JAK UZYSKAÆ POLSKIE CZCIONKI W DVI???]
|
||||
|
||||
@macro parhelp
|
||||
@item @b{@minus{}@minus{}help}, @b{@minus{}h}
|
||||
@ -718,7 +719,7 @@ Sinks: programs which read UTT data and produce output in another format
|
||||
@c ----------------------------------------
|
||||
|
||||
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
||||
@item @strong{Authors:} @tab Tomasz Obrębski
|
||||
@item @strong{Authors:} @tab Tomasz Obrêbski
|
||||
@item @strong{Component category:} @tab source
|
||||
@end multitable
|
||||
|
||||
@ -820,7 +821,7 @@ Output:
|
||||
@c @node sen - sentencizer
|
||||
@c @chapter sen - sentencizer
|
||||
|
||||
@c Authors: Tomasz Obrębski
|
||||
@c Authors: Tomasz Obrêbski
|
||||
|
||||
@c ---------------------------------------------------------------------
|
||||
@c LEM
|
||||
@ -831,7 +832,7 @@ Output:
|
||||
@section lem - morphological analyzer
|
||||
|
||||
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
||||
@item @strong{Authors:} @tab Tomasz Obrębski, Michał Stolarski
|
||||
@item @strong{Authors:} @tab Tomasz Obrêbski, Micha³ Stolarski
|
||||
@item @strong{Component category:} @tab filter
|
||||
@end multitable
|
||||
|
||||
@ -932,7 +933,7 @@ Input:
|
||||
Output (default):
|
||||
|
||||
@example
|
||||
0000 07 W Piszemy lem:pisać,V/AiVpMdTrfNpP1
|
||||
0000 07 W Piszemy lem:pisaæ,V/AiVpMdTrfNpP1
|
||||
0007 01 B _
|
||||
0008 05 W dobre lem:dobry,ADJ/DpNpCnavGaifn
|
||||
0008 05 W dobre lem:dobry,ADJ/DpNsCnavGn
|
||||
@ -947,7 +948,7 @@ Output (default):
|
||||
Output (@option{--one-line} option):
|
||||
|
||||
@example
|
||||
0000 07 W Piszemy lem:pisać,V/AiVpMdTrfNpP1
|
||||
0000 07 W Piszemy lem:pisaæ,V/AiVpMdTrfNpP1
|
||||
0007 01 S _
|
||||
0008 05 W dobre lem:dobry,ADJ/DpNpCnavGaifn lem:dobry,ADJ/DpNsCnavGn
|
||||
0013 01 S _
|
||||
@ -959,7 +960,7 @@ Output (@option{--one-line} option):
|
||||
Output (@option{--one-field} option):
|
||||
|
||||
@example
|
||||
0000 07 W Piszemy lem:pisać,V/AiVpMdTrfNpP1
|
||||
0000 07 W Piszemy lem:pisaæ,V/AiVpMdTrfNpP1
|
||||
0007 01 S _
|
||||
0008 05 W dobre lem:dobry,ADJ/DpNpCnavGaifn,ADJ/DpNsCnavGn
|
||||
0013 01 S _
|
||||
@ -993,7 +994,7 @@ Dictionary entries have the following structure:
|
||||
meaning: replace prefix of length @code{<cut1>} with
|
||||
string @code{<add1>}, replace suffix of length @code{<cut2>} with string
|
||||
@code{<add2>}. For example @code{3t} transforms @samp{kocie} into
|
||||
@samp{kot}, @code{3-4ały} transforms @samp{najbielsi} into @samp{biały}
|
||||
@samp{kot}, @code{3-4a³y} transforms @samp{najbielsi} into @samp{bia³y}
|
||||
|
||||
Each dictionary entry must be written in one line and must not contain blank characters.
|
||||
|
||||
@ -1004,8 +1005,8 @@ kota;1,N/GaNsCg;1,N/GaNsCa
|
||||
kotu;1,N/GaNsCd
|
||||
kotem;2,N/GaNsCi
|
||||
kocie;3t,N/GaNsCl;3t,N/GaNsCv
|
||||
najbielsi;3-4ały,ADJ/DsNpCnGp
|
||||
najbielsze;3-5ały,ADJ/DsNpCnGaifn
|
||||
najbielsi;3-4a³y,ADJ/DsNpCnGp
|
||||
najbielsze;3-5a³y,ADJ/DsNpCnGaifn
|
||||
najlepsi;dobry,ADJ/DsNpCnGp
|
||||
najlepsze;dobry,ADJ/DsNpCnGaifn
|
||||
@end example
|
||||
@ -1064,7 +1065,7 @@ located by default in:
|
||||
|
||||
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
||||
|
||||
@item @strong{Authors:} @tab Michał Stolarski, Tomasz Obrębski
|
||||
@item @strong{Authors:} @tab Micha³ Stolarski, Tomasz Obrêbski
|
||||
@item @strong{Component category:} @tab filter
|
||||
|
||||
@end multitable
|
||||
@ -1155,7 +1156,7 @@ string @var{add1}, replace suffix of length @var{cat2} with string
|
||||
@var{add2}.
|
||||
|
||||
|
||||
Example: @code{3-4ały} transforms @i{najbielsi} into @i{biały}
|
||||
Example: @code{3-4a³y} transforms @i{najbielsi} into @i{bia³y}
|
||||
|
||||
|
||||
@var{description} contains the part of speech and morphosyntactic information (@xref{PMDBF dictionary}.).
|
||||
@ -1164,8 +1165,8 @@ Example: @code{3-4a
|
||||
likelihood of the guess.
|
||||
|
||||
@example
|
||||
*łkę;1a,N/GfNsCa
|
||||
naj*elszy;3-4ały,ADJ/...:...
|
||||
*³kê;1a,N/GfNsCa
|
||||
naj*elszy;3-4a³y,ADJ/...:...
|
||||
@end example
|
||||
|
||||
|
||||
@ -1178,7 +1179,7 @@ naj*elszy;3-4a
|
||||
@section cor - spelling corrector
|
||||
|
||||
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
||||
@item @strong{Authors:} @tab Tomasz Obrębski, Michał Stolarski
|
||||
@item @strong{Authors:} @tab Tomasz Obrêbski, Micha³ Stolarski
|
||||
@item @strong{Component category:} @tab filter
|
||||
@end multitable
|
||||
|
||||
@ -1247,7 +1248,7 @@ odludek
|
||||
|
||||
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
||||
|
||||
@item @strong{Authors:} @tab Tomasz Obrębski
|
||||
@item @strong{Authors:} @tab Tomasz Obrêbski
|
||||
@item @strong{Component category:} @tab filter
|
||||
|
||||
@end multitable
|
||||
@ -1267,7 +1268,7 @@ odludek
|
||||
command: sen
|
||||
|
||||
input:
|
||||
0000 05 W Cześć
|
||||
0000 05 W Cze¶æ
|
||||
0005 01 P !
|
||||
0006 01 S _
|
||||
0007 02 W To
|
||||
@ -1278,7 +1279,7 @@ input:
|
||||
|
||||
output:
|
||||
0000 00 BOS *
|
||||
0000 05 W Cześć
|
||||
0000 05 W Cze¶æ
|
||||
0005 01 P !
|
||||
0006 00 EOS *
|
||||
0006 00 BOS *
|
||||
@ -1299,7 +1300,7 @@ output:
|
||||
@c @node gph - graphizer
|
||||
@c @chapter gph - graphizer
|
||||
|
||||
@c Authors: Tomasz Obrębski
|
||||
@c Authors: Tomasz Obrêbski
|
||||
|
||||
|
||||
|
||||
@ -1312,7 +1313,7 @@ output:
|
||||
@section ser - pattern search tool
|
||||
|
||||
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
||||
@item @strong{Authors:} @tab Tomasz Obrębski
|
||||
@item @strong{Authors:} @tab Tomasz Obrêbski
|
||||
@item @strong{Component category:} @tab filter
|
||||
@end multitable
|
||||
|
||||
@ -1540,7 +1541,7 @@ installed in the system:
|
||||
@section grp - pattern search tool
|
||||
|
||||
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
||||
@item @strong{Authors:} @tab Tomasz Obrębski
|
||||
@item @strong{Authors:} @tab Tomasz Obrêbski
|
||||
@item @strong{Component category:} @tab filter
|
||||
@end multitable
|
||||
|
||||
@ -1634,7 +1635,7 @@ lzop -cd corpus.grp.lzo | grp -a gP -e @var{EXPR} | ser -e @var{EXPR}
|
||||
@node kot
|
||||
@section kot - untokenizer
|
||||
|
||||
Authors: Tomasz Obrębski
|
||||
Authors: Tomasz Obrêbski
|
||||
|
||||
@command{kot} is the opposite of @command{tok}. It changes UTT-formatted text into plain text.
|
||||
|
||||
@ -1849,7 +1850,7 @@ termination of the program.
|
||||
@section fla - the UTT file flattener
|
||||
|
||||
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
||||
@item @strong{Authors:} @tab Tomasz Obrębski
|
||||
@item @strong{Authors:} @tab Tomasz Obrêbski
|
||||
@item @strong{Component category:} @tab filter
|
||||
@end multitable
|
||||
@c
|
||||
@ -1888,7 +1889,7 @@ default, segments containing a field @code{BOS} are seeked.
|
||||
@section unfla - the UTT file unflattener
|
||||
|
||||
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
||||
@item @strong{Authors:} @tab Tomasz Obrębski
|
||||
@item @strong{Authors:} @tab Tomasz Obrêbski
|
||||
@item @strong{Component category:} @tab filter
|
||||
@end multitable
|
||||
|
||||
|
@ -1,6 +1,10 @@
|
||||
#!/usr/bin/ruby -I /usr/local/lib/utt -I $HOME/.local/lib/utt
|
||||
|
||||
$: << "#{ENV['HOME']}/.local/lib/utt"
|
||||
$: << "/usr/local/lib/utt"
|
||||
|
||||
require 'getoptlong'
|
||||
require 'seg.rb'
|
||||
|
||||
opts = GetoptLong.new(
|
||||
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
||||
@ -60,9 +64,6 @@ if $INFO=='DEFAULT'
|
||||
end
|
||||
end
|
||||
|
||||
#require File.expand_path(File.dirname(__FILE__) + "../lib/utt/seg.rb")
|
||||
require 'seg.rb'
|
||||
|
||||
$dgpsep=';'
|
||||
|
||||
def tre(input)
|
||||
|
@ -7,6 +7,6 @@ option "cut-off" - "Do not display answers with less weight than cut-off" int d
|
||||
option "dictionary-home" - "dh" string typestr="FILENAME" no hidden
|
||||
option "dictionary" d "File with dictionary information" string typestr="filename" default="gue.bin" no
|
||||
option "per-info" v "Display performance information" flag off
|
||||
option "weights" w "Print weights" flag off hidden
|
||||
option "weights" w "Print weights" flag off
|
||||
option "no-uppercase" - "Do not process form containing uppercase letters" flag off
|
||||
|
||||
|
@ -7,7 +7,7 @@ double delta=0.1;
|
||||
int cut_off=100;
|
||||
char dictionary[255];
|
||||
bool per_info=false;
|
||||
bool weights=true;
|
||||
bool weights=false;
|
||||
|
||||
void process_guess_options(gengetopt_args_info* args)
|
||||
{
|
||||
@ -55,6 +55,6 @@ void process_guess_options(gengetopt_args_info* args)
|
||||
per_info=args->per_info_flag;
|
||||
|
||||
if(args->weights_given)
|
||||
weights=false;
|
||||
weights=true;
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user