From ac7d97018efb6419eb3d840b7767294d8be71ed8 Mon Sep 17 00:00:00 2001 From: pawelk Date: Tue, 8 Apr 2008 11:53:51 +0000 Subject: [PATCH] Uaktualnilismy kora. git-svn-id: svn://atos.wmid.amu.edu.pl/utt@23 e293616e-ec6a-49c2-aa92-f4a8b91c5d16 --- app/Makefile | 2 +- app/TODO | 2 - app/dist/common/utt_make_config.pl | 14 +++ app/dist/struktura.txt | 1 + app/lib/weights.cor | 21 ++++ app/src/common/common.h | 2 + app/src/kor/Makefile | 47 +++++++++ app/src/kor/cmdline_cor.ggo | 10 ++ app/src/kor/common_cor.cc | 27 +++++ app/src/kor/common_cor.h | 26 +++++ app/src/kor/corlist.cc | 70 +++++++++++++ app/src/kor/corlist.h | 20 ++++ app/src/kor/corr.cc | 163 +++++++++++++++++++++++++++++ app/src/kor/corr.hh | 39 +++++++ app/src/kor/main.cc | 155 +++++++++++++++++++++++++++ app/src/lib/word.cc | 38 ++++++- app/src/lib/word.h | 6 +- app/src/tok/tok | 38 +++++-- 18 files changed, 664 insertions(+), 17 deletions(-) create mode 100644 app/lib/weights.cor create mode 100755 app/src/kor/Makefile create mode 100755 app/src/kor/cmdline_cor.ggo create mode 100755 app/src/kor/common_cor.cc create mode 100755 app/src/kor/common_cor.h create mode 100755 app/src/kor/corlist.cc create mode 100755 app/src/kor/corlist.h create mode 100755 app/src/kor/corr.cc create mode 100755 app/src/kor/corr.hh create mode 100755 app/src/kor/main.cc diff --git a/app/Makefile b/app/Makefile index bc83743..df8a6c0 100644 --- a/app/Makefile +++ b/app/Makefile @@ -27,7 +27,7 @@ export UTT_SHARE_DIR=${UTT_DIR}/share ############################## # list of components to be included in the distribution -COMPONENTS = compiledic cor fla gph grp kon kot lem mar rm12 rs12 sen-l sen-nl ser tags tok.l unfla +COMPONENTS = compiledic cor fla gph grp kon kor kot lem mar rm12 rs12 sen-l sen-nl ser tags tok.l unfla # gue nie dziala! ############################## diff --git a/app/TODO b/app/TODO index bfcbc10..f455e04 100644 --- a/app/TODO +++ b/app/TODO @@ -1,8 +1,6 @@ BARDZO WAZNE: -* lem nie obsluguje dlugich wyrazow [TO] * gue SIE NIE KOMPILUJE !!! -* poprawic cora [PK] WAZNE: * zamienic kota na lepszego (Kubis) [TO] diff --git a/app/dist/common/utt_make_config.pl b/app/dist/common/utt_make_config.pl index 2c78e95..49657d2 100644 --- a/app/dist/common/utt_make_config.pl +++ b/app/dist/common/utt_make_config.pl @@ -14,6 +14,7 @@ prepareUttUsrHome($usr_home); conf_utt(catfile($usr_home, 'utt.conf'), $sys_home); conf_cor(catfile($usr_home, 'cor.conf'), $sys_home); +conf_kor(catfile($usr_home, 'kor.conf'), $sys_home); conf_compiledic(catfile($usr_home, 'compiledic.conf'), $sys_home); conf_grp(catfile($usr_home, 'grp.conf'), $sys_home); conf_gue(catfile($usr_home, 'gue.conf'), $sys_home); @@ -98,6 +99,19 @@ sub conf_cor() { close FILE; } +sub conf_kor() { + my $kor_file = shift; + my $utthome = shift; + open(FILE, ">$kor_file"); + + print FILE makeConfigHeader(); + print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n"; + print FILE "weights=", abs_path("$utthome/lib/utt/weights.cor"), "\n"; + print FILE "threshold=1.0\n"; + + close FILE; +} + sub conf_grp() { my $grp_file = shift; my $utthome = shift; diff --git a/app/dist/struktura.txt b/app/dist/struktura.txt index d14cb07..b97689a 100644 --- a/app/dist/struktura.txt +++ b/app/dist/struktura.txt @@ -43,6 +43,7 @@ /usr/local/lib/utt/ser.l.template /usr/local/lib/utt/terms.m4 +/usr/local/lib/utt/weights.cor /usr/local/share/doc/utt/FAQ /usr/local/share/doc/utt/COPYRIGHT diff --git a/app/lib/weights.cor b/app/lib/weights.cor new file mode 100644 index 0000000..5e966fa --- /dev/null +++ b/app/lib/weights.cor @@ -0,0 +1,21 @@ +%stdcor 1 +%xchg 1 +ż rz 0.5 +ch h 0.5 +u ó 0.5 +u o 0.75 +om ą 0.5 +om a 0.75 +en ę 0.5 +en ę 0.75 +a ą 0.25 +c ć 0.25 +e ę 0.25 +l ł 0.25 +n ń 0.25 +o ó 0.25 +s ¶ 0.25 +z ż 0.25 +z Ľ 0.25 +x Ľ 0.30 + diff --git a/app/src/common/common.h b/app/src/common/common.h index ae08847..b19ded7 100644 --- a/app/src/common/common.h +++ b/app/src/common/common.h @@ -3,6 +3,8 @@ #include #include +#include +#include #include "../lib/const.h" diff --git a/app/src/kor/Makefile b/app/src/kor/Makefile new file mode 100755 index 0000000..01399f6 --- /dev/null +++ b/app/src/kor/Makefile @@ -0,0 +1,47 @@ +PAR=-Wno-deprecated -m32 -fpermissive +# -static +PAR2=-c -Wno-deprecated -m32 -fpermissive +LIB_PATH=../lib +COMMON_PATH=../common +CMDLINE_FILE='"../kor/cmdline.h"' + + +kor: main.cc corr.o corlist.o cmdline.o $(LIB_PATH)/word.o \ + $(LIB_PATH)/auttools.o cmdline.c common_cor.o common.o + g++ $(PAR) main.cc corlist.o corr.o common.o \ + $(LIB_PATH)/word.o $(LIB_PATH)/auttools.o cmdline.c common_cor.o \ + -o kor + +corr.o: corr.cc corr.hh cmdline.h + g++ $(PAR2) corr.cc + +corlist.o: corlist.cc corlist.h cmdline.h + g++ $(PAR2) corlist.cc + + + +common.o: cmdline.h $(COMMON_PATH)/cmdline_common.ggo $(COMMON_PATH)/common.cc \ + $(COMMON_PATH)/common.h + g++ $(PAR2) -D _CMDLINE_FILE=$(CMDLINE_FILE) $(COMMON_PATH)/common.cc + +common_cor.o: cmdline.h common_cor.cc common_cor.h + g++ $(PAR2) common_cor.cc + +cmdline.c cmdline.h: cmdline.ggo + gengetopt -i cmdline.ggo --conf-parser + +cmdline.ggo: cmdline_cor.ggo $(COMMON_PATH)/cmdline_common.ggo + cat cmdline_cor.ggo $(COMMON_PATH)/cmdline_common.ggo > cmdline.ggo + +copy: +ifdef UTT_BIN_DIR + cp kor ${UTT_BIN_DIR} +endif + +clean: clean.cmdline + rm *.o || true + rm kor || true + +clean.cmdline: + rm cmdline.* || true + diff --git a/app/src/kor/cmdline_cor.ggo b/app/src/kor/cmdline_cor.ggo new file mode 100755 index 0000000..8b1c93a --- /dev/null +++ b/app/src/kor/cmdline_cor.ggo @@ -0,0 +1,10 @@ +package "kor" +version "0.1" + +option "dictionary-home" - "Dictionary home dir." string typestr="FILENAME" no hidden +option "dictionary" d "Dictionary" string typestr="FILENAME" default="cor.bin" no +option "distance" n "Maximal edit distance." int default="1" no +option "replace" r "Replace original form with corrected form, place original form in the cor field. This option has no effect in single mode" flag off +#option "single" - "Place all alternatives in the same line" flag off +option "weights" w "File with translation rules." string typestr="FILENAME" default="weight.cor" no +option "threshold" t "Edit distance threshold" float default="1" no \ No newline at end of file diff --git a/app/src/kor/common_cor.cc b/app/src/kor/common_cor.cc new file mode 100755 index 0000000..98a586a --- /dev/null +++ b/app/src/kor/common_cor.cc @@ -0,0 +1,27 @@ +#include +#include +#include "common_cor.h" + +#define MAX_PATH_LENGTH 255 + +char dictionary[MAX_PATH_LENGTH]; +char file_weights[MAX_PATH_LENGTH]; +float threshold; + +void process_cor_options(gengetopt_args_info* args) +{ + if(args->dictionary_given) + { + expand_path(args->dictionary_arg,dictionary); + } + else if (args->dictionary_home_given && args->language_given) + { + char buf[MAX_PATH_LENGTH]; + expand_path(args->dictionary_home_arg, buf); + sprintf(dictionary,"%s/%s/cor.bin",buf,args->language_arg); + } + + expand_path(args->weights_arg, file_weights); + + threshold = args->threshold_arg; +} diff --git a/app/src/kor/common_cor.h b/app/src/kor/common_cor.h new file mode 100755 index 0000000..3ff675e --- /dev/null +++ b/app/src/kor/common_cor.h @@ -0,0 +1,26 @@ +#ifndef __COMMON_COR_H +#define __COMMON_COR_H + +// SEKCJA STALYCH +#ifndef _CMDLINE_FILE + #define _CMDLINE_FILE "../kor/cmdline.h" +#endif + +#define MAX_LEN 2 +#define PREC 1000 +#define Weight int + +// SEKCJA INCLUDOW +#include "../common/common.h" +#include _CMDLINE_FILE + + +// SEKCJA GENGETOPT +extern int change_count; +extern void process_cor_options(gengetopt_args_info* args); +extern char dictionary[]; +extern char file_weights[]; +extern float threshold; + +#endif + diff --git a/app/src/kor/corlist.cc b/app/src/kor/corlist.cc new file mode 100755 index 0000000..470534e --- /dev/null +++ b/app/src/kor/corlist.cc @@ -0,0 +1,70 @@ +#include +#include +#include "corlist.h" + +#define min(x,y) ((xMAX_LEN) { printf("ERROR in file %s: the string '%s' exceeds maximum length of %d characters.\n",Name,a,MAX_LEN); fclose(f); return -1; } + if (w.lb>MAX_LEN) { printf("ERROR in file %s: the string '%s' exceeds maximum length of %d characters.\n",Name,b,MAX_LEN); fclose(f); return -1; } + strcpy(w.a,a), strcpy(w.b,b); + total++; + List = (CorWeight*)realloc(List,total*sizeof(CorWeight)); + List[total-1]=w; + // printf("%s\t<->\t%s\t%1.2f\n",w.a,w.b,((float)w.w/PREC)); + } + } + fclose(f); + // printf("Total: %d\n\n",total); + return(total); +} diff --git a/app/src/kor/corlist.h b/app/src/kor/corlist.h new file mode 100755 index 0000000..5626d14 --- /dev/null +++ b/app/src/kor/corlist.h @@ -0,0 +1,20 @@ +#ifndef _CORLIST_H +#define _CORLIST_H + +//#include +#include "common_cor.h" + +typedef struct { char a[MAX_LEN+1],b[MAX_LEN+1]; Weight w; short la,lb; } CorWeight; + +class CorList +{ + private: + CorWeight *List; + int total; + public: + Weight cor_stdcor, cor_xchg; + int loadCWL(char *Name); + Weight GetValue(char X[100], char Y[100], Weight (*H2)[100], int i, int j); +}; + +#endif diff --git a/app/src/kor/corr.cc b/app/src/kor/corr.cc new file mode 100755 index 0000000..7a7afc2 --- /dev/null +++ b/app/src/kor/corr.cc @@ -0,0 +1,163 @@ +//--------------------------------------------------------------------------- +#include "common_cor.h" +#include "corr.hh" + +#define MAXPATH 256 + +#define min(x,y) ((xy)?(x):(y)) + + +Weight Corr::ed(int i,int j) +{ + if(i==-1) + return (j+1)*CL.cor_stdcor; // moje* Nie wiem czy tak będzie dobrze, ale uznałem, że poza tablicą powinny być wartosci przemnożone przez wagę standardowej zmiany litery + if(j==-1) + return (i+1)*CL.cor_stdcor; // moje* + if(i==-2 || j==-2) + return (n+1)*CL.cor_stdcor; // moje* + + if(X[i]==Y[j]) + return min(H2[i-1][j-1], min(CL.cor_stdcor+min(H2[i][j-1],H2[i-1][j]),CL.GetValue(X,Y,H2,i,j))); + if(X[i-1]==Y[j] && X[i]==Y[j-1]) + return min(min(CL.cor_xchg+H2[i-2][j-2],CL.cor_stdcor+min(H2[i][j-1],H2[i-1][j])), CL.GetValue(X,Y,H2,i,j)); + return min(CL.cor_stdcor+min(H2[i-1][j-1],min(H2[i][j-1],H2[i-1][j])), CL.GetValue(X,Y,H2,i,j)); + +/* // wersja z wagami ale dla floatów + if(X[i]==Y[j])//zielone-> <- niebieskie -> <- rózowe -> + return min(H2[i-1][j-1], min(1+min(H2[i][j-1],H2[i-1][j]),CL.GetValue(X,Y,H2,i,j))); + if(X[i-1]==Y[j] && X[i]==Y[j-1]) + return min(1+min(H2[i-2][j-2],min(H2[i][j-1],H2[i-1][j])), CL.GetValue(X,Y,H2,i,j)); + return min(1+min(H2[i-1][j-1],min(H2[i][j-1],H2[i-1][j])), CL.GetValue(X,Y,H2,i,j)); +*/ + +/* // normalna wersja + if(X[i]==Y[j]) + return H2[i-1][j-1]; + if(X[i-1]==Y[j] && X[i]==Y[j-1]) + return 1+min(H2[i-2][j-2],min(H2[i][j-1],H2[i-1][j])); + return 1+min(H2[i-1][j-1],min(H2[i][j-1],H2[i-1][j])); +*/ + +/* + if(X[i]==Y[j]) + return H[(i-1)+2][(j-1)+2]; + if(X[i-1]==Y[j] && X[i]==Y[j-1]) + return 1+min(H[(i-2)+2][(j-2)+2],min(H[(i)+2][(j-1)+2],H[(i-1)+2][(j)+2])); + return 1+min(H[(i-1)+2][(j-1)+2],min(H[(i)+2][(j-1)+2],H[(i-1)+2][(j)+2])); +*/ +} + +int Corr::load2(char *Name) // moje +{ + return CL.loadCWL(Name); +} + +Weight Corr::cuted(int j) +{ + int l=max(0,j-t); + int u=min(m,j+t); + Weight ce=(j+t)*PREC; // moje* + for(int k=l;k<=u;k++) + { + if(H2[k][j]0) + j--; + else + more=0; + while(more && !continued(path[j])); + state=path[j]+1; + } + return count; +} + + +//--------------------------------------------------------------------------- + diff --git a/app/src/kor/corr.hh b/app/src/kor/corr.hh new file mode 100755 index 0000000..fcbf669 --- /dev/null +++ b/app/src/kor/corr.hh @@ -0,0 +1,39 @@ +//--------------------------------------------------------------------------- +#ifndef _corr_hh +#define _corr_hh +//--------------------------------------------------------------------------- + +#include "../lib/tfti.h" +#include "../lib/word.h" +#include "corlist.h" +#include "../common/common.h" + +class Corr : public TFTiv +{ +private: + Weight H[100][100]; + char X[100]; // misspelled string + char Y[100]; // (possibly partial) candidate string + int m; // length of X + int n; // maximal length of Y + + Weight ed(int,int); + Weight cuted(int); + void recomputeH(int); + + +public: + Weight (*H2)[100]; // moje: zmiana z int na Weight (float) + int t; // threshold + CorList CL; // moje + + Corr() : H2((Weight(*)[100])&H[2][2]) {}; // moje (int->float) + Corr(const char* a) : TFTiv(a), H2((Weight(*)[100])&H[2][2]) { }; + + int correct(const char* w, Words& tab); + + int load2(char *Name); // moje +}; + +//--------------------------------------------------------------------------- +#endif diff --git a/app/src/kor/main.cc b/app/src/kor/main.cc new file mode 100755 index 0000000..4ff051f --- /dev/null +++ b/app/src/kor/main.cc @@ -0,0 +1,155 @@ +#include +#include +#include "../lib/iotools.h" +#include "common_cor.h" +#include "corr.hh" +#include + + +int main(int argc, char** argv) { + +// setlocale(LC_CTYPE,""); +// setlocale(LC_COLLATE,""); + + gengetopt_args_info args; + + if(cmdline_parser(argc, argv, &args) != 0) + exit(1); + + process_config_files(&args,argv[0]); + process_common_options(&args,argv[0]); + process_cor_options(&args); + + Corr cor; + + //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// strcpy(dictionary,"cor.bin"); + + cor.load(dictionary); + cor.t=args.distance_arg; + + //>>>>>>>>>>>>>>> + cor.CL.cor_stdcor=1*PREC; + cor.CL.cor_xchg=1*PREC; + if (cor.load2(file_weights)==-1) return -1; // moje + cor.t=1*PREC; // ODLEGLOSC EDYCYJNA + //<<<<<<<<<<<<<< + + char line[MAX_LINE+1]; + long line_count = 0; + + Segment seg; + Words tab; + char form1[MAX_LINE]; + char* form; + int formcasing; + char corfield[MAX_LINE]=""; + + while (fgets(line, MAX_LINE, inputf)) + { + ++line_count; + char outline[128]; + + if (!process_seg(line, args)) + fputs(line, outputf); + else + { + char form[MAX_FORM]; + + tab.clear(); + getfield(line,input_field_prefix,form); + if (form==NULL) continue; + + formcasing=3; + cor.correct(form, tab); + + if( tab.count() == 0 ) + { + formcasing=casing(form); + if( formcasing == 1 || formcasing == 2) + tolowers(form, form1), cor.correct(form1, tab); + } + + if ( tab.count() == 0) + fputs(line, failedf); + else + { + tab.sort(); + + if(args.replace_flag) + { + char corfield[128]; + strcpy(corfield, input_field_prefix); + strcat(corfield, form); + seg.aux[seg.auxn]=corfield; + ++seg.auxn; + for(int i=0; i= 0; --i) + { + if(tab[i].w_suf() > threshold) continue; + restorecasing(tab[i].form(),tab[i].form(),formcasing); + p += sprintf(p," %s%s,%1.2f",output_field_prefix,tab[i].form(),tab[i].w_suf()); + } + sprintf(p,"\n"); + + strcpy(outline,line); + outline[strlen(outline)-1]='\0'; + strcat(outline,corfield); + fputs(outline, outputf); + } + else if(one_field) + { + char* p=corfield; + p += sprintf(p," %s",output_field_prefix); + for(int i=tab.count()-1; i >= 0; --i) + { + if(tab[i].w_suf() > threshold) continue; + restorecasing(tab[i].form(),tab[i].form(),formcasing); + p += sprintf(p,(i==0)?"%s,%1.2f":";%s,%1.2f",tab[i].form(),tab[i].w_suf()); + } + + sprintf(p,"\n"); + + strcpy(outline,line); + outline[strlen(outline)-1]='\0'; + strcat(outline,corfield); + fputs(outline, outputf); + } + else + { + for(int i=tab.count()-1; i >= 0; --i) + { + if(tab[i].w_suf() > threshold) continue; + restorecasing(tab[i].form(),tab[i].form(),formcasing); + sprintf(corfield," %s%s,%1.2f\n",output_field_prefix,tab[i].form(),tab[i].w_suf()); + strcpy(outline,line); + outline[strlen(outline)-1]='\0'; + strcat(outline,corfield); + fputs(outline, outputf); + } + } + } + } + } + + if(args.interactive_flag) + { + fflush(outputf); + fflush(failedf); + } + } + cmdline_parser_free(&args); +} diff --git a/app/src/lib/word.cc b/app/src/lib/word.cc index 0616cd7..f669327 100644 --- a/app/src/lib/word.cc +++ b/app/src/lib/word.cc @@ -22,7 +22,7 @@ void Word::autodescr(const char* fo, const char* de) } //--------------------------------------------------------------------------- -int Word::cmp_w(Word a, Word b) { +bool Word::cmp_w(Word a, Word b) { return (a.w_suf() > b.w_suf()); } //--------------------------------------------------------------------------- @@ -107,7 +107,7 @@ int Words::next() { } //--------------------------------------------------------------------------- -void Words::sort() { +void Words::sort() { std::sort(tab.begin(), tab.end(), Word::cmp_w); } @@ -123,9 +123,15 @@ int Words::add(const char* fo) if (cnt>=tab.capacity()-1) tab.resize(tab.size()*2); - tab[cnt].form(fo); - tab[cnt].w_suf(0.0); + + Word o; + o.form(fo); + o.w_suf(0.0); + tab.push_back(o); +// tab[cnt].form(fo); +// tab[cnt].w_suf(0.0); + // if(cntform(fo); @@ -136,6 +142,30 @@ int Words::add(const char* fo) //return -1; } +//--------------------------------------------------------------------------- + //TYMCZASOWO TAK(DLA CORA) +int Words::add(const char* fo, float weight) +{ + int i = find(fo); + if(i!=-1) { + return i; + } + + if (cnt>=tab.capacity()-1) + tab.resize(tab.size()*2); + + Word o; + o.form(fo); + o.w_suf(weight); + tab.push_back(o); +// tab[cnt].form(fo); +// tab[cnt].w_suf(weight); + + return cnt++; + // } + //return -1; +} + //--------------------------------------------------------------------------- int Words::add(const char* fo, const char* des) diff --git a/app/src/lib/word.h b/app/src/lib/word.h index 4859f3f..0156d6d 100644 --- a/app/src/lib/word.h +++ b/app/src/lib/word.h @@ -12,6 +12,8 @@ using namespace std; + + class Word { public: @@ -36,7 +38,7 @@ private: float _w_suf; // float _w_pref; public: - static int cmp_w(Word a, Word b); + static bool cmp_w(Word a, Word b); Word() : _len_suf(-1) { *f='\0'; returned=0; }; Word(const char* fo, const char* des) : _len_suf(-1) { autodescr(fo,des); _w_suf=1.0; returned=0; }; @@ -76,6 +78,7 @@ public: }; + inline Word::Word(const Word& word) { strcpy(f,word.f); strcpy(l,word.l); strcpy(d,word.d); _len_suf=word._len_suf; _w_suf=word._w_suf; returned = 0; } @@ -118,6 +121,7 @@ class Words int count() const { return cnt; } void clear() { cnt=0; tab.clear(); } int add(const char* fo); + int add(const char* fo, float weight); int add(const char* fo, const char* des); /* zwraca index nastepnego wyniku, podczas pierwszego wywolania diff --git a/app/src/tok/tok b/app/src/tok/tok index 87a8496..991787c 100755 --- a/app/src/tok/tok +++ b/app/src/tok/tok @@ -10,6 +10,8 @@ use locale; use Getopt::Long; use File::HomeDir; +my $max_form_length = 50; + my $interactive=0; my $help; @@ -20,7 +22,7 @@ my $userconfigfile=home()."/.utt/tok.conf"; #read configuration files########################### my $file; foreach $file ($systemconfigfile, $userconfigfile){ - if(open(CONFIG, $configfile1)){ + if(open(CONFIG, $file)){ while () { chomp; s/#.*//; @@ -74,17 +76,32 @@ while(<>) # | [^[:print:]] (?{seg("B",$&)}) +sub min { + my ($val1, $val2) = @_; + if($val1 < $val2) { + return $val1; + } + else { + return $val2; + } +} + sub seg { my ($tag,$match) = @_; - my $len=length $match; - printf "%04d %02d %s ", $offset, $len, $tag; + my $length = length $match; + my $idx = 0; + while($idx < $length) { + my $l = min $max_form_length, $length - $idx; + my $m = substr $match, $idx, $l; + + printf "%04d %02d %s ", $offset + $idx, $l, $tag; if($tag eq 'S') { - for(my $i=0; $i<$len; ++$i) + for(my $i=0; $i<$l; ++$i) { - my $c = substr $match, $i, 1; + my $c = substr $m, $i, 1; print '_' if $c eq ' '; print '\n' if $c eq "\n"; print '\t' if $c eq "\t"; @@ -94,12 +111,15 @@ sub seg } elsif($tag eq 'B') { - printf "\\x%02X", ord($match); + printf "\\x%02X", ord($m); } else { - print $match; + print $m; } print "\n"; - $offset += $len; -} + $idx += $l; + } # while($idx < $length) + $offset += $length; +} #sub seg +