tok accepts options and prints help message now

This commit is contained in:
Tomasz Obrebski 2013-01-18 18:46:38 +01:00
parent 18e1952916
commit 243d027267
4 changed files with 33 additions and 7 deletions

View File

@ -99,7 +99,7 @@ install_documentation: documentation
.PHONY: install_share .PHONY: install_share
install_share: share install_share: share
cd $(CUR_DIR)/share && make install; cd $(CUR_DIR) cd $(CUR_DIR)/share && make install; cd $(CUR_DIR)
echo grammar compilation... #grammar compilation...
dgc dgc
# ------------------------------------------------------------------ # ------------------------------------------------------------------

View File

@ -8,12 +8,12 @@ LDFLAGS +=
CFLAGS += -O2 CFLAGS += -O2
tok: lex.yy.c tok: lex.yy.c
$(CC) $(CFLAGS) -lfl -o tok lex.yy.c $(LDFLAGS) $(CC) $(CFLAGS) -o tok lex.yy.c tok_cmdline.c -lfl $(LDFLAGS)
lex.yy.c: tok_cmdline.h tok_cmdline.c lex.yy.c: tok_cmdline.h tok_cmdline.c
$(FLEX) tok.l $(FLEX) tok.l
tok_cmdline.h tok_cmdline.c: tok_cmdline.h tok_cmdline.c: tok_cmdline.ggo
$(GENGETOPT) -i tok_cmdline.ggo --conf-parser --file=tok_cmdline $(GENGETOPT) -i tok_cmdline.ggo --conf-parser --file=tok_cmdline
.PHONY: install .PHONY: install

View File

@ -55,14 +55,15 @@
%% %%
/*int main(int argc, char** argv) int main(int argc, char** argv)
{ {
if (cmdline_parser(argc, argv, &args) != 0) return 1; if (cmdline_parser(argc, argv, &args) != 0) exit(1);
setlocale(LC_CTYPE,""); setlocale(LC_CTYPE,"");
setlocale(LC_COLLATE,""); setlocale(LC_COLLATE,"");
yylex(); yylex();
return 0; return 0;
}*/ }
int yywrap() int yywrap()
{ {

View File

@ -1,4 +1,29 @@
package "tok" package "tok"
version "0.1" version "0.1"
usage "tok [OPTIONS]"
purpose "tok transforms raw text into UTT format."
option "interactive" i "Interactive mode." flag off description "OPTIONS"
option "interactive" i "Interactive mode (no output buffering)." flag off
text "
DESCRIPTION
tok reads from standard input, identifies tokens on the basis of their orthographic form and writes a sequence of segments in UTT format to
the standard output. The type of the token is printed as the type field.
OUTPUT FORMAT
UTT-file with four fields: start, length, type, and form. In the type field five types of tokens are distinguished:
W (word) - continuous sequence of letters
N (number) - continuous sequence of digits
S (space) - continuous sequence of space characters
P (punctuation) - single printable character other than W, N, S
B (unprintable character) - single unprintable character
USAGE EXAMPLE
tok
"