tok accepts options and prints help message now
This commit is contained in:
parent
18e1952916
commit
243d027267
2
Makefile
2
Makefile
@ -99,7 +99,7 @@ install_documentation: documentation
|
||||
.PHONY: install_share
|
||||
install_share: share
|
||||
cd $(CUR_DIR)/share && make install; cd $(CUR_DIR)
|
||||
echo grammar compilation...
|
||||
#grammar compilation...
|
||||
dgc
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
@ -8,12 +8,12 @@ LDFLAGS +=
|
||||
CFLAGS += -O2
|
||||
|
||||
tok: lex.yy.c
|
||||
$(CC) $(CFLAGS) -lfl -o tok lex.yy.c $(LDFLAGS)
|
||||
$(CC) $(CFLAGS) -o tok lex.yy.c tok_cmdline.c -lfl $(LDFLAGS)
|
||||
|
||||
lex.yy.c: tok_cmdline.h tok_cmdline.c
|
||||
$(FLEX) tok.l
|
||||
|
||||
tok_cmdline.h tok_cmdline.c:
|
||||
tok_cmdline.h tok_cmdline.c: tok_cmdline.ggo
|
||||
$(GENGETOPT) -i tok_cmdline.ggo --conf-parser --file=tok_cmdline
|
||||
|
||||
.PHONY: install
|
||||
|
@ -55,14 +55,15 @@
|
||||
|
||||
%%
|
||||
|
||||
/*int main(int argc, char** argv)
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (cmdline_parser(argc, argv, &args) != 0) return 1;
|
||||
if (cmdline_parser(argc, argv, &args) != 0) exit(1);
|
||||
setlocale(LC_CTYPE,"");
|
||||
|
||||
setlocale(LC_COLLATE,"");
|
||||
yylex();
|
||||
return 0;
|
||||
}*/
|
||||
}
|
||||
|
||||
int yywrap()
|
||||
{
|
||||
|
@ -1,4 +1,29 @@
|
||||
package "tok"
|
||||
version "0.1"
|
||||
usage "tok [OPTIONS]"
|
||||
purpose "tok transforms raw text into UTT format."
|
||||
|
||||
option "interactive" i "Interactive mode." flag off
|
||||
description "OPTIONS"
|
||||
|
||||
option "interactive" i "Interactive mode (no output buffering)." flag off
|
||||
|
||||
text "
|
||||
DESCRIPTION
|
||||
|
||||
tok reads from standard input, identifies tokens on the basis of their orthographic form and writes a sequence of segments in UTT format to
|
||||
the standard output. The type of the token is printed as the type field.
|
||||
|
||||
OUTPUT FORMAT
|
||||
|
||||
UTT-file with four fields: start, length, type, and form. In the type field five types of tokens are distinguished:
|
||||
|
||||
W (word) - continuous sequence of letters
|
||||
N (number) - continuous sequence of digits
|
||||
S (space) - continuous sequence of space characters
|
||||
P (punctuation) - single printable character other than W, N, S
|
||||
B (unprintable character) - single unprintable character
|
||||
|
||||
USAGE EXAMPLE
|
||||
|
||||
tok
|
||||
"
|
||||
|
Loading…
Reference in New Issue
Block a user