diff --git a/Makefile b/Makefile index 35bfa80..06190d3 100644 --- a/Makefile +++ b/Makefile @@ -99,7 +99,7 @@ install_documentation: documentation .PHONY: install_share install_share: share cd $(CUR_DIR)/share && make install; cd $(CUR_DIR) - echo grammar compilation... + #grammar compilation... dgc # ------------------------------------------------------------------ diff --git a/src/tok.l/Makefile b/src/tok.l/Makefile index 698bbdc..5584a66 100644 --- a/src/tok.l/Makefile +++ b/src/tok.l/Makefile @@ -8,12 +8,12 @@ LDFLAGS += CFLAGS += -O2 tok: lex.yy.c - $(CC) $(CFLAGS) -lfl -o tok lex.yy.c $(LDFLAGS) + $(CC) $(CFLAGS) -o tok lex.yy.c tok_cmdline.c -lfl $(LDFLAGS) lex.yy.c: tok_cmdline.h tok_cmdline.c $(FLEX) tok.l -tok_cmdline.h tok_cmdline.c: +tok_cmdline.h tok_cmdline.c: tok_cmdline.ggo $(GENGETOPT) -i tok_cmdline.ggo --conf-parser --file=tok_cmdline .PHONY: install diff --git a/src/tok.l/tok.l b/src/tok.l/tok.l index bc9606d..b130867 100644 --- a/src/tok.l/tok.l +++ b/src/tok.l/tok.l @@ -55,14 +55,15 @@ %% -/*int main(int argc, char** argv) +int main(int argc, char** argv) { - if (cmdline_parser(argc, argv, &args) != 0) return 1; + if (cmdline_parser(argc, argv, &args) != 0) exit(1); setlocale(LC_CTYPE,""); + setlocale(LC_COLLATE,""); yylex(); return 0; -}*/ +} int yywrap() { diff --git a/src/tok.l/tok_cmdline.ggo b/src/tok.l/tok_cmdline.ggo index 8b58931..560dd11 100644 --- a/src/tok.l/tok_cmdline.ggo +++ b/src/tok.l/tok_cmdline.ggo @@ -1,4 +1,29 @@ package "tok" version "0.1" +usage "tok [OPTIONS]" +purpose "tok transforms raw text into UTT format." -option "interactive" i "Interactive mode." flag off +description "OPTIONS" + +option "interactive" i "Interactive mode (no output buffering)." flag off + +text " +DESCRIPTION + +tok reads from standard input, identifies tokens on the basis of their orthographic form and writes a sequence of segments in UTT format to +the standard output. The type of the token is printed as the type field. + +OUTPUT FORMAT + +UTT-file with four fields: start, length, type, and form. In the type field five types of tokens are distinguished: + + W (word) - continuous sequence of letters + N (number) - continuous sequence of digits + S (space) - continuous sequence of space characters + P (punctuation) - single printable character other than W, N, S + B (unprintable character) - single unprintable character + +USAGE EXAMPLE + + tok +"