sen component added

This commit is contained in:
Tomasz Obrebski 2012-02-09 22:49:01 +01:00
parent cfdf333a51
commit ac25afd65b
4 changed files with 131 additions and 2 deletions

2
configure vendored
View File

@ -8,7 +8,7 @@ CUR_DIR=$(pwd)
SRC_DIR="${CUR_DIR}/src"
# list of components to compile
CMPLIST="compiledic cor dgp fla gph grp gue kon kor kot lem mar rm12 rs12 sen-nl ser tags tok.c tok.l tok.pl unfla"
CMPLIST="compiledic cor dgp fla gph grp gue kon kor kot lem mar rm12 rs12 sen sen-nl ser tags tok.c tok.l tok.pl unfla"
COMP=

32
src/sen/Makefile Normal file
View File

@ -0,0 +1,32 @@
include ../../config.mak
ifeq ($(BUILD_STATIC), yes)
LDFLAGS += -static
endif
LDFLAGS +=
CFLAGS += -O2
sen: lex.yy.c
$(CC) $(CFLAGS) -o sen lex.yy.c -lfl $(LDFLAGS)
lex.yy.c: sen.l
$(FLEX) sen.l
.PHONY: install
install:
ifdef BIN_DIR
install -m 0755 sen $(BIN_DIR)
endif
.PHONY: uninstall
uninstall:
ifdef BIN_DIR
rm $(BIN_DIR)/sen
endif
clean: clean.flex
rm sen || true
clean.flex:
rm lex.yy.c || true

97
src/sen/sen.l Normal file
View File

@ -0,0 +1,97 @@
%{
void print_EOS(void);
void set_position(void);
int pos=0;
int len=0;
%}
ul [A-Z¡ÆÊ£ÑÓ¦¯¬]
ll [a-z±æê³ñ󶿼]
l ul|ll
n [0-9]+
s [ \t]+
ab1 (mgr|in¿|prof|hab|doc|dyr|kier|zast)
ab2 (ul|pl|al)
ab3 (str|ryc|rys)
ab4 (np|tzw)
abrv ({ab1}|{ab2}|{ab3})
SEG .*\n
N {n}{s}{n}{s}N{s}.*\n
S {n}{s}{n}{s}S{s}.*\n
P {n}{s}{n}{s}P{s}.*\n
W {n}{s}{n}{s}W{s}.*\n
UL {n}{s}{n}{s}W{s}{ul}.*\n
Cap {n}{s}{n}{s}W{s}{ul}{ll}*.*\n
POINT {n}{s}{n}{s}P{s}\.({s}.*)?\n
QMARK {n}{s}{n}{s}P{s}\?({s}.*)?\n
EXCL {n}{s}{n}{s}P{s}\!({s}.*)?\n
DASH {n}{s}{n}{s}P{s}\-({s}.*)?\n
POINTS {POINT}+
ABRV {n}{s}{n}{s}W{s}{abrv}({s}.*)?\n
EOS {POINT}|{POINTS}|{QMARK}|{EXCL}
%%
{N}({POINT}{N})+ { ECHO; set_position(); }
({UL}{POINT}{S}?)+{Cap} { ECHO; set_position(); }
{ABRV}{POINT} { ECHO; set_position(); }
{P}/{S}{DASH} { ECHO; set_position(); print_EOS(); }
{EOS}/{S}({Cap}|{P}|{N}) { ECHO; set_position(); print_EOS(); }
.* { ECHO; set_position(); }
<<EOF>> { printf("%04d 00 EOS *\n",pos+len); exit(1); }
%%
int main()
{
printf("0000 00 BOS *\n");
yylex();
return 0;
}
int yywrap()
{
return 1;
}
void set_position()
{
char *lastseg;
char *tmp;
yytext[yyleng-1] = '\0';
tmp = strrchr(yytext, '\n');
if( tmp )
{
lastseg = tmp + 1;
}
else
{
lastseg = yytext;
}
sscanf(lastseg, "%d %d", &pos, &len);
yytext[yyleng-1] = '\n';
}
void print_EOS()
{
printf("%04d 00 EOS *\n%04d 00 BOS *\n", pos+len, pos+len);
}

View File

@ -11,7 +11,7 @@ use File::Temp;
use File::HomeDir;
my $LIB_DIR="/usr/local/lib/utt";
my $systemconfigfile='/usr/local/etc/utt/ser.conf';
my $systemconfigfile='/etc/utt/ser.conf';
my $userconfigfile=home()."/.utt/ser.conf";
Getopt::Long::Configure('no_ignore_case_always');