From ac25afd65b189d5b88120229128a777cf9edf93f Mon Sep 17 00:00:00 2001 From: Tomasz Obrebski Date: Thu, 9 Feb 2012 22:49:01 +0100 Subject: [PATCH] sen component added --- configure | 2 +- src/sen/Makefile | 32 ++++++++++++++++ src/sen/sen.l | 97 ++++++++++++++++++++++++++++++++++++++++++++++++ src/ser/ser | 2 +- 4 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 src/sen/Makefile create mode 100644 src/sen/sen.l diff --git a/configure b/configure index 5846631..c5513b0 100755 --- a/configure +++ b/configure @@ -8,7 +8,7 @@ CUR_DIR=$(pwd) SRC_DIR="${CUR_DIR}/src" # list of components to compile -CMPLIST="compiledic cor dgp fla gph grp gue kon kor kot lem mar rm12 rs12 sen-nl ser tags tok.c tok.l tok.pl unfla" +CMPLIST="compiledic cor dgp fla gph grp gue kon kor kot lem mar rm12 rs12 sen sen-nl ser tags tok.c tok.l tok.pl unfla" COMP= diff --git a/src/sen/Makefile b/src/sen/Makefile new file mode 100644 index 0000000..96df701 --- /dev/null +++ b/src/sen/Makefile @@ -0,0 +1,32 @@ +include ../../config.mak + +ifeq ($(BUILD_STATIC), yes) + LDFLAGS += -static +endif + +LDFLAGS += +CFLAGS += -O2 + +sen: lex.yy.c + $(CC) $(CFLAGS) -o sen lex.yy.c -lfl $(LDFLAGS) + +lex.yy.c: sen.l + $(FLEX) sen.l + +.PHONY: install +install: +ifdef BIN_DIR + install -m 0755 sen $(BIN_DIR) +endif + +.PHONY: uninstall +uninstall: +ifdef BIN_DIR + rm $(BIN_DIR)/sen +endif + +clean: clean.flex + rm sen || true + +clean.flex: + rm lex.yy.c || true diff --git a/src/sen/sen.l b/src/sen/sen.l new file mode 100644 index 0000000..7d77d74 --- /dev/null +++ b/src/sen/sen.l @@ -0,0 +1,97 @@ +%{ + + void print_EOS(void); + void set_position(void); + + int pos=0; + int len=0; + +%} + +ul [A-Z¡ÆÊ£ÑÓ¦¯¬] +ll [a-z±æê³ñ󶿼] +l ul|ll +n [0-9]+ +s [ \t]+ + + +ab1 (mgr|in¿|prof|hab|doc|dyr|kier|zast) +ab2 (ul|pl|al) +ab3 (str|ryc|rys) +ab4 (np|tzw) + +abrv ({ab1}|{ab2}|{ab3}) + +SEG .*\n +N {n}{s}{n}{s}N{s}.*\n +S {n}{s}{n}{s}S{s}.*\n +P {n}{s}{n}{s}P{s}.*\n +W {n}{s}{n}{s}W{s}.*\n +UL {n}{s}{n}{s}W{s}{ul}.*\n +Cap {n}{s}{n}{s}W{s}{ul}{ll}*.*\n +POINT {n}{s}{n}{s}P{s}\.({s}.*)?\n +QMARK {n}{s}{n}{s}P{s}\?({s}.*)?\n +EXCL {n}{s}{n}{s}P{s}\!({s}.*)?\n +DASH {n}{s}{n}{s}P{s}\-({s}.*)?\n +POINTS {POINT}+ + +ABRV {n}{s}{n}{s}W{s}{abrv}({s}.*)?\n + +EOS {POINT}|{POINTS}|{QMARK}|{EXCL} + + +%% + + +{N}({POINT}{N})+ { ECHO; set_position(); } +({UL}{POINT}{S}?)+{Cap} { ECHO; set_position(); } +{ABRV}{POINT} { ECHO; set_position(); } + + +{P}/{S}{DASH} { ECHO; set_position(); print_EOS(); } +{EOS}/{S}({Cap}|{P}|{N}) { ECHO; set_position(); print_EOS(); } + +.* { ECHO; set_position(); } + +<> { printf("%04d 00 EOS *\n",pos+len); exit(1); } + +%% + +int main() +{ + printf("0000 00 BOS *\n"); + yylex(); + return 0; +} + +int yywrap() +{ + return 1; +} + +void set_position() +{ + char *lastseg; + char *tmp; + + yytext[yyleng-1] = '\0'; + + tmp = strrchr(yytext, '\n'); + if( tmp ) + { + lastseg = tmp + 1; + } + else + { + lastseg = yytext; + } + + sscanf(lastseg, "%d %d", &pos, &len); + yytext[yyleng-1] = '\n'; + +} + +void print_EOS() +{ + printf("%04d 00 EOS *\n%04d 00 BOS *\n", pos+len, pos+len); +} diff --git a/src/ser/ser b/src/ser/ser index ab7bf0f..216d358 100755 --- a/src/ser/ser +++ b/src/ser/ser @@ -11,7 +11,7 @@ use File::Temp; use File::HomeDir; my $LIB_DIR="/usr/local/lib/utt"; -my $systemconfigfile='/usr/local/etc/utt/ser.conf'; +my $systemconfigfile='/etc/utt/ser.conf'; my $userconfigfile=home()."/.utt/ser.conf"; Getopt::Long::Configure('no_ignore_case_always');