|
|
|
@ -1,5 +1,7 @@
|
|
|
|
|
|
|
|
|
|
\input texinfo @c -*-texinfo-*-
|
|
|
|
|
@documentencoding ISO-8859-2
|
|
|
|
|
@c @documentencoding ISO-8859-2
|
|
|
|
|
@documentencoding UTF-8
|
|
|
|
|
@c @documentlanguage pl
|
|
|
|
|
|
|
|
|
|
@c %**start of header
|
|
|
|
@ -10,7 +12,7 @@
|
|
|
|
|
@copying
|
|
|
|
|
This manual is for UAM Text Tools (version 0.90, October, 2008)
|
|
|
|
|
|
|
|
|
|
Copyright @copyright{} 2005, 2007 Tomasz Obrêbski, Micha³ Stolarski, Justyna Walkowska, Pawe³ Konieczka.
|
|
|
|
|
Copyright @copyright{} 2005, 2007 Tomasz Obrębski, Michał Stolarski, Justyna Walkowska, Paweł Konieczka.
|
|
|
|
|
|
|
|
|
|
Permission is granted to copy, distribute and/or modify this document
|
|
|
|
|
under the terms of the GNU Free Documentation License, Version 1.2 or
|
|
|
|
@ -30,7 +32,7 @@ Documentation License,,GNU Free Documentation License.
|
|
|
|
|
@title UAM Text Tools 0.90 - User Manual
|
|
|
|
|
@subtitle edition 0.01, @today
|
|
|
|
|
@subtitle status: prescript
|
|
|
|
|
@author by Justyna Walkowska, Tomasz Obr@,{}ebski and Micha@l{} Stolarski
|
|
|
|
|
@author by Justyna Walkowska, Tomasz Obrębski and Michał Stolarski
|
|
|
|
|
@page
|
|
|
|
|
@vskip 0pt plus 1filll
|
|
|
|
|
@insertcopying
|
|
|
|
@ -41,9 +43,14 @@ Documentation License,,GNU Free Documentation License.
|
|
|
|
|
@c @paragraphindent none
|
|
|
|
|
|
|
|
|
|
@iftex
|
|
|
|
|
@tex
|
|
|
|
|
% \usepackage[T1]{fontenc}
|
|
|
|
|
% \usepackage[utf8]{inputenc}
|
|
|
|
|
% \usepackage{times}
|
|
|
|
|
@end tex
|
|
|
|
|
|
|
|
|
|
@parskip = 0.5@normalbaselineskip plus 3pt minus 1pt
|
|
|
|
|
@end iftex
|
|
|
|
|
|
|
|
|
|
@c @headings off
|
|
|
|
|
@c @everyheading LEM(1) @| @| LEM(1)
|
|
|
|
|
@everyfooting @today @c @| @thispage @|
|
|
|
|
@ -83,13 +90,13 @@ developed at Adam Mickiewicz University. Its functionality includes:
|
|
|
|
|
@itemize @bullet
|
|
|
|
|
|
|
|
|
|
@item
|
|
|
|
|
tokenization
|
|
|
|
|
tokenization ółąż
|
|
|
|
|
@item
|
|
|
|
|
dictionary-based morphological analysis
|
|
|
|
|
@item
|
|
|
|
|
heuristic morphological analysis of unknown words
|
|
|
|
|
@item
|
|
|
|
|
spelling correction
|
|
|
|
|
spelling correction ółąśćż
|
|
|
|
|
@item
|
|
|
|
|
pattern search
|
|
|
|
|
@item
|
|
|
|
@ -124,11 +131,11 @@ List of contributors:
|
|
|
|
|
|
|
|
|
|
@itemize
|
|
|
|
|
@item Pawel Konieczka
|
|
|
|
|
@item Tomasz Obrebski
|
|
|
|
|
@item Michal Stolarski
|
|
|
|
|
@item Tomasz Obrębski
|
|
|
|
|
@item Michał Stolarski
|
|
|
|
|
@item Marcin Walas
|
|
|
|
|
@item Justyna Walkowska
|
|
|
|
|
@item Pawel Werenski
|
|
|
|
|
@item Paweł Wereński
|
|
|
|
|
@end itemize
|
|
|
|
|
|
|
|
|
|
@c ----------------------------------------------------------------------
|
|
|
|
@ -250,7 +257,7 @@ sentence: @samp{Piszemy dobre progrumy.}
|
|
|
|
|
|
|
|
|
|
@example
|
|
|
|
|
0000 00 BOS *
|
|
|
|
|
0000 07 W Piszemy lem:pisaæ,V
|
|
|
|
|
0000 07 W Piszemy lem:pisać,V
|
|
|
|
|
0007 01 S _
|
|
|
|
|
0008 05 W dobre lem:dobry,ADJ
|
|
|
|
|
0013 01 S _
|
|
|
|
@ -261,7 +268,7 @@ sentence: @samp{Piszemy dobre progrumy.}
|
|
|
|
|
0024 00 BOS *
|
|
|
|
|
0024 11 W Warszawiacy lem:Warszawiak,N
|
|
|
|
|
0035 01 S _
|
|
|
|
|
0036 03 W te¿
|
|
|
|
|
0036 03 W też
|
|
|
|
|
0039 01 P .
|
|
|
|
|
0040 00 EOS *
|
|
|
|
|
|
|
|
|
@ -269,7 +276,7 @@ sentence: @samp{Piszemy dobre progrumy.}
|
|
|
|
|
|
|
|
|
|
@example
|
|
|
|
|
0000 BOS *
|
|
|
|
|
0000 W Piszemy lem:pisaæ,V
|
|
|
|
|
0000 W Piszemy lem:pisać,V
|
|
|
|
|
0007 S _
|
|
|
|
|
0008 W dobre lem:dobry,ADJ
|
|
|
|
|
0013 S _
|
|
|
|
@ -282,7 +289,7 @@ Posion information may be provided only for some types of segments:
|
|
|
|
|
|
|
|
|
|
@example
|
|
|
|
|
0000 BOS *
|
|
|
|
|
W Piszemy lem:pisaæ,V
|
|
|
|
|
W Piszemy lem:pisać,V
|
|
|
|
|
S _
|
|
|
|
|
W dobre lem:dobry,ADJ
|
|
|
|
|
S _
|
|
|
|
@ -293,7 +300,7 @@ S _
|
|
|
|
|
0024 BOS *
|
|
|
|
|
W Warszawiacy lem:Warszawiak,N
|
|
|
|
|
S _
|
|
|
|
|
W te¿
|
|
|
|
|
W też
|
|
|
|
|
P .
|
|
|
|
|
EOS *
|
|
|
|
|
@end example
|
|
|
|
@ -428,7 +435,7 @@ as ISO, ANSI, DOS.
|
|
|
|
|
@c @end table
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@c [JAK UZYSKAÆ POLSKIE CZCIONKI W DVI???]
|
|
|
|
|
@c [JAK UZYSKAĂ POLSKIE CZCIONKI W DVI???]
|
|
|
|
|
|
|
|
|
|
@macro parhelp
|
|
|
|
|
@item @b{@minus{}@minus{}help}, @b{@minus{}h}
|
|
|
|
@ -650,7 +657,7 @@ Sinks: programs which read UTT data and produce output in another format
|
|
|
|
|
@c ----------------------------------------
|
|
|
|
|
|
|
|
|
|
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrêbski
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrębski
|
|
|
|
|
@item @strong{Component category:} @tab source
|
|
|
|
|
@item @strong{Input format:} @tab raw text file
|
|
|
|
|
@item @strong{Output format:} @tab UTT regular
|
|
|
|
@ -755,7 +762,7 @@ Output:
|
|
|
|
|
@c @node sen - sentencizer
|
|
|
|
|
@c @chapter sen - sentencizer
|
|
|
|
|
|
|
|
|
|
@c Authors: Tomasz Obrêbski
|
|
|
|
|
@c Authors: Tomasz Obrębski
|
|
|
|
|
|
|
|
|
|
@c ---------------------------------------------------------------------
|
|
|
|
|
@c LEM
|
|
|
|
@ -766,7 +773,7 @@ Output:
|
|
|
|
|
@section lem - morphological analyzer
|
|
|
|
|
|
|
|
|
|
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrêbski, Micha³ Stolarski
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrębski, Michał Stolarski
|
|
|
|
|
@item @strong{Component category:} @tab filter
|
|
|
|
|
@item @strong{Input format:} @tab UTT regular
|
|
|
|
|
@item @strong{Output format:} @tab UTT regular
|
|
|
|
@ -870,7 +877,7 @@ Input:
|
|
|
|
|
Output (default):
|
|
|
|
|
|
|
|
|
|
@example
|
|
|
|
|
0000 07 W Piszemy lem:pisaæ,V/AiVpMdTrfNpP1
|
|
|
|
|
0000 07 W Piszemy lem:pisać,V/AiVpMdTrfNpP1
|
|
|
|
|
0007 01 B _
|
|
|
|
|
0008 05 W dobre lem:dobry,ADJ/DpNpCnavGaifn
|
|
|
|
|
0008 05 W dobre lem:dobry,ADJ/DpNsCnavGn
|
|
|
|
@ -885,7 +892,7 @@ Output (default):
|
|
|
|
|
Output (@option{--one-line} option):
|
|
|
|
|
|
|
|
|
|
@example
|
|
|
|
|
0000 07 W Piszemy lem:pisaæ,V/AiVpMdTrfNpP1
|
|
|
|
|
0000 07 W Piszemy lem:pisać,V/AiVpMdTrfNpP1
|
|
|
|
|
0007 01 S _
|
|
|
|
|
0008 05 W dobre lem:dobry,ADJ/DpNpCnavGaifn lem:dobry,ADJ/DpNsCnavGn
|
|
|
|
|
0013 01 S _
|
|
|
|
@ -897,7 +904,7 @@ Output (@option{--one-line} option):
|
|
|
|
|
Output (@option{--one-field} option):
|
|
|
|
|
|
|
|
|
|
@example
|
|
|
|
|
0000 07 W Piszemy lem:pisaæ,V/AiVpMdTrfNpP1
|
|
|
|
|
0000 07 W Piszemy lem:pisać,V/AiVpMdTrfNpP1
|
|
|
|
|
0007 01 S _
|
|
|
|
|
0008 05 W dobre lem:dobry,ADJ/DpNpCnavGaifn,ADJ/DpNsCnavGn
|
|
|
|
|
0013 01 S _
|
|
|
|
@ -931,7 +938,7 @@ Dictionary entries have the following structure:
|
|
|
|
|
meaning: replace prefix of length @code{<cut1>} with
|
|
|
|
|
string @code{<add1>}, replace suffix of length @code{<cut2>} with string
|
|
|
|
|
@code{<add2>}. For example @code{3t} transforms @samp{kocie} into
|
|
|
|
|
@samp{kot}, @code{3-4a³y} transforms @samp{najbielsi} into @samp{bia³y}
|
|
|
|
|
@samp{kot}, @code{3-4aÂły} transforms @samp{najbielsi} into @samp{biaÂły}
|
|
|
|
|
|
|
|
|
|
Each dictionary entry must be written in one line and must not contain blank characters.
|
|
|
|
|
|
|
|
|
@ -942,8 +949,8 @@ kota;1,N/GaNsCg;1,N/GaNsCa
|
|
|
|
|
kotu;1,N/GaNsCd
|
|
|
|
|
kotem;2,N/GaNsCi
|
|
|
|
|
kocie;3t,N/GaNsCl;3t,N/GaNsCv
|
|
|
|
|
najbielsi;3-4a³y,ADJ/DsNpCnGp
|
|
|
|
|
najbielsze;3-5a³y,ADJ/DsNpCnGaifn
|
|
|
|
|
najbielsi;3-4ały,ADJ/DsNpCnGp
|
|
|
|
|
najbielsze;3-5ały,ADJ/DsNpCnGaifn
|
|
|
|
|
najlepsi;dobry,ADJ/DsNpCnGp
|
|
|
|
|
najlepsze;dobry,ADJ/DsNpCnGaifn
|
|
|
|
|
@end example
|
|
|
|
@ -1008,7 +1015,7 @@ lem -c -d <dict1> | lem -S lem -d <dict2>
|
|
|
|
|
|
|
|
|
|
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
|
|
|
|
|
|
|
|
|
@item @strong{Authors:} @tab Micha³ Stolarski, Tomasz Obrêbski
|
|
|
|
|
@item @strong{Authors:} @tab Michał Stolarski, Tomasz Obrębski
|
|
|
|
|
@item @strong{Component category:} @tab filter
|
|
|
|
|
|
|
|
|
|
@end multitable
|
|
|
|
@ -1105,7 +1112,7 @@ string @var{add1}, replace suffix of length @var{cat2} with string
|
|
|
|
|
@var{add2}.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Example: @code{3-4a³y} transforms @i{najbielsi} into @i{bia³y}
|
|
|
|
|
Example: @code{3-4ały} transforms @i{najbielsi} into @i{biały}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@var{description} contains the part of speech and morphosyntactic information (@xref{PMDBF dictionary}.).
|
|
|
|
@ -1113,10 +1120,10 @@ Example: @code{3-4a³y} transforms @i{najbielsi} into @i{bia³y}
|
|
|
|
|
@var{weight} is an integer value between 1 and 999 indicating the
|
|
|
|
|
likelihood of the guess.
|
|
|
|
|
|
|
|
|
|
@example
|
|
|
|
|
*³kê;1a,N/GfNsCa
|
|
|
|
|
naj*elszy;3-4a³y,ADJ/...:...
|
|
|
|
|
@end example
|
|
|
|
|
@c @example
|
|
|
|
|
@c *łkę;1a,N/GfNsCa
|
|
|
|
|
@c naj*elszy;3-4ały,ADJ/...:...
|
|
|
|
|
@c @end example
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@c ---------------------------------------------------------------------
|
|
|
|
@ -1128,7 +1135,7 @@ naj*elszy;3-4a³y,ADJ/...:...
|
|
|
|
|
@section cor - spelling corrector
|
|
|
|
|
|
|
|
|
|
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrêbski, Micha³ Stolarski
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrębski, Michał Stolarski
|
|
|
|
|
@item @strong{Component category:} @tab filter
|
|
|
|
|
@item @strong{Input format:} @tab UTT regular
|
|
|
|
|
@item @strong{Output format:} @tab UTT regular
|
|
|
|
@ -1215,7 +1222,116 @@ compiledic <dictionaryname>.dic
|
|
|
|
|
@node kor
|
|
|
|
|
@section kor - configurable spelling corrector
|
|
|
|
|
|
|
|
|
|
[TODO]
|
|
|
|
|
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
|
|
|
|
@item @strong{Authors:} @tab Paweł Werenski, Tomasz Obrębski, Michał Stolarski
|
|
|
|
|
@item @strong{Component category:} @tab filter
|
|
|
|
|
@item @strong{Input format:} @tab UTT regular
|
|
|
|
|
@item @strong{Output format:} @tab UTT regular
|
|
|
|
|
@item @strong{Required annotation:} @tab tok
|
|
|
|
|
@end multitable
|
|
|
|
|
|
|
|
|
|
@menu
|
|
|
|
|
* kor description::
|
|
|
|
|
* kor command line options::
|
|
|
|
|
* kor weights definition file::
|
|
|
|
|
* kor dictionaries::
|
|
|
|
|
@end menu
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@node kor description
|
|
|
|
|
@subsection Description
|
|
|
|
|
|
|
|
|
|
The spelling corrector applies a Pawel Werenski's dynamic programming
|
|
|
|
|
algorithm to the FSA representation of the set of word forms of the
|
|
|
|
|
Polex/PMDBF dictionary. The algorithm is an extension of K. Oflazer
|
|
|
|
|
algorithm used by @command{cor}. In the extended version it is
|
|
|
|
|
possible to assign weights to individual edit operations.
|
|
|
|
|
|
|
|
|
|
Given an incorrect word form it returns all word forms
|
|
|
|
|
present in the dictionary whose edit distance is smaller than the
|
|
|
|
|
threshold given as the parameter.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@node kor command line options
|
|
|
|
|
@subsection Command line options
|
|
|
|
|
|
|
|
|
|
@table @code
|
|
|
|
|
|
|
|
|
|
@parhelp
|
|
|
|
|
@parversion
|
|
|
|
|
@parinteractive
|
|
|
|
|
@c @parfile
|
|
|
|
|
@c @paroutput
|
|
|
|
|
@c @parfail
|
|
|
|
|
@c @parcopy
|
|
|
|
|
@parinputfield
|
|
|
|
|
@paroutputfield
|
|
|
|
|
@pardictionary
|
|
|
|
|
@parprocess
|
|
|
|
|
@parselect
|
|
|
|
|
@parunselect
|
|
|
|
|
@paroneline
|
|
|
|
|
@paronefield
|
|
|
|
|
|
|
|
|
|
@item @b{@minus{}@minus{}distance=@var{int}, @minus{}n @var{int}}
|
|
|
|
|
Maximum edit distance (default='1').
|
|
|
|
|
|
|
|
|
|
@item @b{@minus{}@minus{}weights=@var{filename}, @minus{}w @var{filename}}
|
|
|
|
|
Edit operations' weights file.
|
|
|
|
|
|
|
|
|
|
@c @item @b{@minus{}@minus{}replace, @minus{}r}
|
|
|
|
|
@c Replace original form with corrected form, place original form in the
|
|
|
|
|
@c cor field. This option has no effect in @option{--one-*} modes (default=off)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@end table
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@node kor weights definition file
|
|
|
|
|
@subsection Weights definition file
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
|
|
|
|
|
@example
|
|
|
|
|
|
|
|
|
|
%stdcor 1
|
|
|
|
|
%xchg 1
|
|
|
|
|
ż rz 0.5
|
|
|
|
|
ch h 0.5
|
|
|
|
|
u ó 0.5
|
|
|
|
|
|
|
|
|
|
@end example
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Default weight is set to 1 (@code{%stdcor 1}), the weight of exchange
|
|
|
|
|
operation is set to 1 (@code{%xchg 1}), the three principal orthographic
|
|
|
|
|
errors are assigned the weight 0.5.
|
|
|
|
|
|
|
|
|
|
The edit operation weight declaration, such as
|
|
|
|
|
|
|
|
|
|
@example
|
|
|
|
|
ż rz 0.5
|
|
|
|
|
@end example
|
|
|
|
|
|
|
|
|
|
works in both ways, i.e. ż->rz, rz->ż.
|
|
|
|
|
|
|
|
|
|
The default weights definition file for @code{kor} is:
|
|
|
|
|
|
|
|
|
|
@example
|
|
|
|
|
$HOME/.local/share/utt/weights.kor
|
|
|
|
|
@end example
|
|
|
|
|
|
|
|
|
|
or, if the above mentioned file is absent:
|
|
|
|
|
|
|
|
|
|
@example
|
|
|
|
|
/usr/local/share/utt/weights.kor
|
|
|
|
|
@end example
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@node kor dictionaries
|
|
|
|
|
@subsection Dictionaries
|
|
|
|
|
|
|
|
|
|
see @command{cor}
|
|
|
|
|
|
|
|
|
|
@c ---------------------------------------------------------------------
|
|
|
|
|
@c SEN
|
|
|
|
@ -1227,7 +1343,7 @@ compiledic <dictionaryname>.dic
|
|
|
|
|
|
|
|
|
|
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
|
|
|
|
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrêbski
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrębski
|
|
|
|
|
@item @strong{Component category:} @tab filter
|
|
|
|
|
@item @strong{Input format:} @tab UTT regular
|
|
|
|
|
@item @strong{Output format:} @tab UTT regular
|
|
|
|
@ -1255,7 +1371,7 @@ compiledic <dictionaryname>.dic
|
|
|
|
|
command: sen
|
|
|
|
|
|
|
|
|
|
input:
|
|
|
|
|
0000 05 W Cze¶æ
|
|
|
|
|
0000 05 W Cześć
|
|
|
|
|
0005 01 P !
|
|
|
|
|
0006 01 S _
|
|
|
|
|
0007 02 W To
|
|
|
|
@ -1266,7 +1382,7 @@ input:
|
|
|
|
|
|
|
|
|
|
output:
|
|
|
|
|
0000 00 BOS *
|
|
|
|
|
0000 05 W Cze¶æ
|
|
|
|
|
0000 05 W Cześć
|
|
|
|
|
0005 01 P !
|
|
|
|
|
0006 00 EOS *
|
|
|
|
|
0006 00 BOS *
|
|
|
|
@ -1287,7 +1403,7 @@ output:
|
|
|
|
|
@c @node gph - graphizer
|
|
|
|
|
@c @chapter gph - graphizer
|
|
|
|
|
|
|
|
|
|
@c Authors: Tomasz Obrêbski
|
|
|
|
|
@c Authors: Tomasz Obrębski
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -1300,7 +1416,7 @@ output:
|
|
|
|
|
@section ser - pattern search tool
|
|
|
|
|
|
|
|
|
|
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrêbski
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrębski
|
|
|
|
|
@item @strong{Component category:} @tab filter
|
|
|
|
|
@item @strong{Input format:} @tab UTT regular
|
|
|
|
|
@item @strong{Output format:} @tab UTT regular
|
|
|
|
@ -1536,7 +1652,7 @@ installed in the system:
|
|
|
|
|
@section grp - pattern search tool
|
|
|
|
|
|
|
|
|
|
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrêbski
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrębski
|
|
|
|
|
@item @strong{Component category:} @tab filter
|
|
|
|
|
@item @strong{Input format:} @tab UTT flattened
|
|
|
|
|
@item @strong{Output format:} @tab UTT flattened
|
|
|
|
@ -1625,7 +1741,7 @@ lzop -cd corpus.grp.lzo | grp -e @var{EXPR} | unfla | ser -e @var{EXPR}
|
|
|
|
|
@section mar
|
|
|
|
|
|
|
|
|
|
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
|
|
|
|
@item @strong{Authors:} @tab Marcin Walas, Tomasz Obrêbski
|
|
|
|
|
@item @strong{Authors:} @tab Marcin Walas, Tomasz Obrębski
|
|
|
|
|
@item @strong{Input format:} @tab UTT flattened
|
|
|
|
|
@item @strong{Output format:} @tab UTT flattened
|
|
|
|
|
@item @strong{Required annotation:} @tab tok, sen, lem -1
|
|
|
|
@ -1645,7 +1761,7 @@ lzop -cd corpus.grp.lzo | grp -e @var{EXPR} | unfla | ser -e @var{EXPR}
|
|
|
|
|
@section kot - untokenizer
|
|
|
|
|
|
|
|
|
|
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrêbski
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrębski
|
|
|
|
|
@item @strong{Component category:} @tab filter
|
|
|
|
|
@item @strong{Input format:} @tab UTT regular
|
|
|
|
|
@item @strong{Output format:} @tab text
|
|
|
|
@ -1838,7 +1954,7 @@ sequence:
|
|
|
|
|
@section compiledic - the dictionary compiler
|
|
|
|
|
|
|
|
|
|
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
|
|
|
|
@item @strong{Authors:} @tab Michal Stolarski, Tomasz Obrebski
|
|
|
|
|
@item @strong{Authors:} @tab Michał Stolarski, Tomasz Obrębski
|
|
|
|
|
@item @strong{Component category:} @tab additional tool
|
|
|
|
|
@end multitable
|
|
|
|
|
@c
|
|
|
|
@ -1883,7 +1999,7 @@ termination of the program.
|
|
|
|
|
@section fla - the UTT file flattener
|
|
|
|
|
|
|
|
|
|
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrêbski
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrębski
|
|
|
|
|
@item @strong{Input format:} @tab UTT regular
|
|
|
|
|
@item @strong{Output format:} @tab UTT flattened
|
|
|
|
|
@item @strong{Required annotation:} @tab sen
|
|
|
|
@ -1931,7 +2047,7 @@ default, segments containing a field @code{BOS} are seeked.
|
|
|
|
|
@section unfla - the UTT file unflattener
|
|
|
|
|
|
|
|
|
|
@multitable {aaaaaaaaaaaaaaaaaaaaaaaaa} {aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrêbski
|
|
|
|
|
@item @strong{Authors:} @tab Tomasz Obrębski
|
|
|
|
|
@item @strong{Input format:} @tab UTT flattened
|
|
|
|
|
@item @strong{Output format:} @tab UTT regular
|
|
|
|
|
@item @strong{Required annotation:} @tab -
|
|
|
|
@ -2235,7 +2351,6 @@ descr = pos ( / ( attr val + ) + ) ?
|
|
|
|
|
@item
|
|
|
|
|
@tab @code{v} @tab vocative.
|
|
|
|
|
@item
|
|
|
|
|
@item
|
|
|
|
|
@code{G} @tab @tab Gender
|
|
|
|
|
@item
|
|
|
|
|
@tab @code{p} @tab masculine-personal,
|
|
|
|
@ -2728,7 +2843,7 @@ Report bugs to <obrebski@@amu.edu.pl>.
|
|
|
|
|
@c @node Copyright
|
|
|
|
|
@c @chapter Copyright
|
|
|
|
|
@c
|
|
|
|
|
@c Copyright 2004 by Tomasz Obrebski
|
|
|
|
|
@c Copyright 2004 by Tomasz Obrębski
|
|
|
|
|
@c This software is free for research and educational use.
|
|
|
|
|
|
|
|
|
|
@c ---------------------------------------------------------------------
|
|
|
|
|