Uaktualnilismy kora.

git-svn-id: svn://atos.wmid.amu.edu.pl/utt@23 e293616e-ec6a-49c2-aa92-f4a8b91c5d16
This commit is contained in:
pawelk 2008-04-08 11:53:51 +00:00
parent 317d53bd6b
commit ac7d97018e
18 changed files with 664 additions and 17 deletions

View File

@ -27,7 +27,7 @@ export UTT_SHARE_DIR=${UTT_DIR}/share
##############################
# list of components to be included in the distribution
COMPONENTS = compiledic cor fla gph grp kon kot lem mar rm12 rs12 sen-l sen-nl ser tags tok.l unfla
COMPONENTS = compiledic cor fla gph grp kon kor kot lem mar rm12 rs12 sen-l sen-nl ser tags tok.l unfla
# gue nie dziala!
##############################

View File

@ -1,8 +1,6 @@
BARDZO WAZNE:
* lem nie obsluguje dlugich wyrazow [TO]
* gue SIE NIE KOMPILUJE !!!
* poprawic cora [PK]
WAZNE:
* zamienic kota na lepszego (Kubis) [TO]

View File

@ -14,6 +14,7 @@ prepareUttUsrHome($usr_home);
conf_utt(catfile($usr_home, 'utt.conf'), $sys_home);
conf_cor(catfile($usr_home, 'cor.conf'), $sys_home);
conf_kor(catfile($usr_home, 'kor.conf'), $sys_home);
conf_compiledic(catfile($usr_home, 'compiledic.conf'), $sys_home);
conf_grp(catfile($usr_home, 'grp.conf'), $sys_home);
conf_gue(catfile($usr_home, 'gue.conf'), $sys_home);
@ -98,6 +99,19 @@ sub conf_cor() {
close FILE;
}
sub conf_kor() {
my $kor_file = shift;
my $utthome = shift;
open(FILE, ">$kor_file");
print FILE makeConfigHeader();
print FILE "dictionary-home=", abs_path("$utthome/share/utt"), "\n";
print FILE "weights=", abs_path("$utthome/lib/utt/weights.cor"), "\n";
print FILE "threshold=1.0\n";
close FILE;
}
sub conf_grp() {
my $grp_file = shift;
my $utthome = shift;

View File

@ -43,6 +43,7 @@
/usr/local/lib/utt/ser.l.template
/usr/local/lib/utt/terms.m4
/usr/local/lib/utt/weights.cor
/usr/local/share/doc/utt/FAQ
/usr/local/share/doc/utt/COPYRIGHT

21
app/lib/weights.cor Normal file
View File

@ -0,0 +1,21 @@
%stdcor 1
%xchg 1
¿ rz 0.5
ch h 0.5
u ó 0.5
u o 0.75
om ¹ 0.5
om a 0.75
en ê 0.5
en ê 0.75
a ¹ 0.25
c æ 0.25
e ê 0.25
l ³ 0.25
n ñ 0.25
o ó 0.25
s ¶ 0.25
z ¿ 0.25
z ¼ 0.25
x ¼ 0.30

View File

@ -3,6 +3,8 @@
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include "../lib/const.h"

47
app/src/kor/Makefile Executable file
View File

@ -0,0 +1,47 @@
PAR=-Wno-deprecated -m32 -fpermissive
# -static
PAR2=-c -Wno-deprecated -m32 -fpermissive
LIB_PATH=../lib
COMMON_PATH=../common
CMDLINE_FILE='"../kor/cmdline.h"'
kor: main.cc corr.o corlist.o cmdline.o $(LIB_PATH)/word.o \
$(LIB_PATH)/auttools.o cmdline.c common_cor.o common.o
g++ $(PAR) main.cc corlist.o corr.o common.o \
$(LIB_PATH)/word.o $(LIB_PATH)/auttools.o cmdline.c common_cor.o \
-o kor
corr.o: corr.cc corr.hh cmdline.h
g++ $(PAR2) corr.cc
corlist.o: corlist.cc corlist.h cmdline.h
g++ $(PAR2) corlist.cc
common.o: cmdline.h $(COMMON_PATH)/cmdline_common.ggo $(COMMON_PATH)/common.cc \
$(COMMON_PATH)/common.h
g++ $(PAR2) -D _CMDLINE_FILE=$(CMDLINE_FILE) $(COMMON_PATH)/common.cc
common_cor.o: cmdline.h common_cor.cc common_cor.h
g++ $(PAR2) common_cor.cc
cmdline.c cmdline.h: cmdline.ggo
gengetopt -i cmdline.ggo --conf-parser
cmdline.ggo: cmdline_cor.ggo $(COMMON_PATH)/cmdline_common.ggo
cat cmdline_cor.ggo $(COMMON_PATH)/cmdline_common.ggo > cmdline.ggo
copy:
ifdef UTT_BIN_DIR
cp kor ${UTT_BIN_DIR}
endif
clean: clean.cmdline
rm *.o || true
rm kor || true
clean.cmdline:
rm cmdline.* || true

10
app/src/kor/cmdline_cor.ggo Executable file
View File

@ -0,0 +1,10 @@
package "kor"
version "0.1"
option "dictionary-home" - "Dictionary home dir." string typestr="FILENAME" no hidden
option "dictionary" d "Dictionary" string typestr="FILENAME" default="cor.bin" no
option "distance" n "Maximal edit distance." int default="1" no
option "replace" r "Replace original form with corrected form, place original form in the cor field. This option has no effect in single mode" flag off
#option "single" - "Place all alternatives in the same line" flag off
option "weights" w "File with translation rules." string typestr="FILENAME" default="weight.cor" no
option "threshold" t "Edit distance threshold" float default="1" no

27
app/src/kor/common_cor.cc Executable file
View File

@ -0,0 +1,27 @@
#include <stdlib.h>
#include <string.h>
#include "common_cor.h"
#define MAX_PATH_LENGTH 255
char dictionary[MAX_PATH_LENGTH];
char file_weights[MAX_PATH_LENGTH];
float threshold;
void process_cor_options(gengetopt_args_info* args)
{
if(args->dictionary_given)
{
expand_path(args->dictionary_arg,dictionary);
}
else if (args->dictionary_home_given && args->language_given)
{
char buf[MAX_PATH_LENGTH];
expand_path(args->dictionary_home_arg, buf);
sprintf(dictionary,"%s/%s/cor.bin",buf,args->language_arg);
}
expand_path(args->weights_arg, file_weights);
threshold = args->threshold_arg;
}

26
app/src/kor/common_cor.h Executable file
View File

@ -0,0 +1,26 @@
#ifndef __COMMON_COR_H
#define __COMMON_COR_H
// SEKCJA STALYCH
#ifndef _CMDLINE_FILE
#define _CMDLINE_FILE "../kor/cmdline.h"
#endif
#define MAX_LEN 2
#define PREC 1000
#define Weight int
// SEKCJA INCLUDOW
#include "../common/common.h"
#include _CMDLINE_FILE
// SEKCJA GENGETOPT
extern int change_count;
extern void process_cor_options(gengetopt_args_info* args);
extern char dictionary[];
extern char file_weights[];
extern float threshold;
#endif

70
app/src/kor/corlist.cc Executable file
View File

@ -0,0 +1,70 @@
#include <stdio.h>
#include <alloc.h>
#include "corlist.h"
#define min(x,y) ((x<y)?(x):(y))
Weight CorList::GetValue(char X[100], char Y[100], Weight (*H2)[100], int i, int j)
{
Weight R = 9999*PREC; // (+nieskonczonosc)
int n;
for (n=0; n<total; n++)
{
int la = List[n].la;
int lb = List[n].lb;
if (la<=i+1 && lb<=j+1)
if (strncmp(List[n].a,X+i+1-la,la)==0 && strncmp(List[n].b,Y+j+1-lb,lb)==0)
R = min(R,H2[i-la][j-lb]+List[n].w);
if (la<=j+1 && lb<=i+1)
if (strncmp(List[n].b,X+i+1-lb,lb)==0 && strncmp(List[n].a,Y+j+1-la,la)==0)
R = min(R,H2[i-lb][j-la]+List[n].w);
}
return R;
}
int CorList::loadCWL(char *Name)
{
FILE *f = fopen(Name,"r");
int len=MAX_LEN*2+100;
char a[100],b[100], buf[len+1];
float wtmp;
CorWeight w;
cor_stdcor = 1 * PREC;
cor_xchg = 1 * PREC;
List = (CorWeight*)malloc(sizeof(CorWeight)); // 100 BO NIE DZIALA REALLOC
total=0;
if (!f) { fprintf(stderr,"\nCan't open correction weight list file!\n"); return -1; }
while (!feof(f) && fgets(buf,len,f))
{
if (buf[0]=='%')
{
sscanf(buf+1,"%s %f",&a,&wtmp);
int ok=0;
if (strcmp(a,"stdcor")==0) { ok=1; cor_stdcor=Weight(wtmp*PREC); /*printf("Standard letter correction set to: %1.2f\n",wtmp);*/ }
if (strcmp(a,"xchg")==0) { ok=1; cor_xchg=Weight(wtmp*PREC); /*printf("Inverted letters correction set to: %1.2f\n",wtmp);*/ }
if (!ok) { fprintf(stderr,"Error in file %s: Unknown keyword: '%s'.\n",Name,a); return -1; }
}
else
{
sscanf(buf,"%s %s %f",&a,&b,&wtmp);
w.w=(Weight)(wtmp*PREC);
w.la=strlen(a); w.lb=strlen(b);
if (w.la>MAX_LEN) { printf("ERROR in file %s: the string '%s' exceeds maximum length of %d characters.\n",Name,a,MAX_LEN); fclose(f); return -1; }
if (w.lb>MAX_LEN) { printf("ERROR in file %s: the string '%s' exceeds maximum length of %d characters.\n",Name,b,MAX_LEN); fclose(f); return -1; }
strcpy(w.a,a), strcpy(w.b,b);
total++;
List = (CorWeight*)realloc(List,total*sizeof(CorWeight));
List[total-1]=w;
// printf("%s\t<->\t%s\t%1.2f\n",w.a,w.b,((float)w.w/PREC));
}
}
fclose(f);
// printf("Total: %d\n\n",total);
return(total);
}

20
app/src/kor/corlist.h Executable file
View File

@ -0,0 +1,20 @@
#ifndef _CORLIST_H
#define _CORLIST_H
//#include <stdio.h>
#include "common_cor.h"
typedef struct { char a[MAX_LEN+1],b[MAX_LEN+1]; Weight w; short la,lb; } CorWeight;
class CorList
{
private:
CorWeight *List;
int total;
public:
Weight cor_stdcor, cor_xchg;
int loadCWL(char *Name);
Weight GetValue(char X[100], char Y[100], Weight (*H2)[100], int i, int j);
};
#endif

163
app/src/kor/corr.cc Executable file
View File

@ -0,0 +1,163 @@
//---------------------------------------------------------------------------
#include "common_cor.h"
#include "corr.hh"
#define MAXPATH 256
#define min(x,y) ((x<y)?(x):(y))
#define max(x,y) ((x>y)?(x):(y))
Weight Corr::ed(int i,int j)
{
if(i==-1)
return (j+1)*CL.cor_stdcor; // moje* Nie wiem czy tak bêdzie dobrze, ale uzna³em, ¿e poza tablic¹ powinny byæ wartosci przemno¿one przez wagê standardowej zmiany litery
if(j==-1)
return (i+1)*CL.cor_stdcor; // moje*
if(i==-2 || j==-2)
return (n+1)*CL.cor_stdcor; // moje*
if(X[i]==Y[j])
return min(H2[i-1][j-1], min(CL.cor_stdcor+min(H2[i][j-1],H2[i-1][j]),CL.GetValue(X,Y,H2,i,j)));
if(X[i-1]==Y[j] && X[i]==Y[j-1])
return min(min(CL.cor_xchg+H2[i-2][j-2],CL.cor_stdcor+min(H2[i][j-1],H2[i-1][j])), CL.GetValue(X,Y,H2,i,j));
return min(CL.cor_stdcor+min(H2[i-1][j-1],min(H2[i][j-1],H2[i-1][j])), CL.GetValue(X,Y,H2,i,j));
/* // wersja z wagami ale dla floatów
if(X[i]==Y[j])//zielone-> <- niebieskie -> <- rózowe ->
return min(H2[i-1][j-1], min(1+min(H2[i][j-1],H2[i-1][j]),CL.GetValue(X,Y,H2,i,j)));
if(X[i-1]==Y[j] && X[i]==Y[j-1])
return min(1+min(H2[i-2][j-2],min(H2[i][j-1],H2[i-1][j])), CL.GetValue(X,Y,H2,i,j));
return min(1+min(H2[i-1][j-1],min(H2[i][j-1],H2[i-1][j])), CL.GetValue(X,Y,H2,i,j));
*/
/* // normalna wersja
if(X[i]==Y[j])
return H2[i-1][j-1];
if(X[i-1]==Y[j] && X[i]==Y[j-1])
return 1+min(H2[i-2][j-2],min(H2[i][j-1],H2[i-1][j]));
return 1+min(H2[i-1][j-1],min(H2[i][j-1],H2[i-1][j]));
*/
/*
if(X[i]==Y[j])
return H[(i-1)+2][(j-1)+2];
if(X[i-1]==Y[j] && X[i]==Y[j-1])
return 1+min(H[(i-2)+2][(j-2)+2],min(H[(i)+2][(j-1)+2],H[(i-1)+2][(j)+2]));
return 1+min(H[(i-1)+2][(j-1)+2],min(H[(i)+2][(j-1)+2],H[(i-1)+2][(j)+2]));
*/
}
int Corr::load2(char *Name) // moje
{
return CL.loadCWL(Name);
}
Weight Corr::cuted(int j)
{
int l=max(0,j-t);
int u=min(m,j+t);
Weight ce=(j+t)*PREC; // moje*
for(int k=l;k<=u;k++)
{
if(H2[k][j]<ce)//if(H[(k)+2][(j)+2]<ce)
ce=H2[k][j];//ce=H[(k)+2][(j)+2];
}
return ce;
}
/*
void Corr::recomputeH(int j)
{
for(int i=0;i<=m;i++)
H[(i)+2][(j)+2]=ed(i,j);
}
*/
void Corr::recomputeH(int j)
{
int lo=max(0,j-t-2);
int hi=min(m,j+t+2);
for(int i=lo;i<=hi;++i)
H2[i][j]=ed(i,j);//H[(i)+2][(j)+2]=ed(i,j);
}
int Corr::correct(const char* w, Words& tab)
{
long int path[MAXPATH]={0};
int i; // row index (X)
int j; // column index (Y)
long state=0;
strcpy(X,w);
m=strlen(X)-1;
n=m+t;
for(i=(-2);i<=m;i++)
H[(i)+2][(-2)+2]=n*PREC; // moje *PREC
for(i=(-1);i<=m;i++)
H[(i)+2][(-1)+2]=((i)+1)*PREC; // moje*
for(j=(-2);j<=n;j++)
H[(-2)+2][(j)+2]=n*1000; // moje*
for(j=(-1);j<=n;j++)
H[(-1)+2][(j)+2]=((j)+1)*PREC; // moje*
for(j=0; j<=n; ++j)
for(i=0; i<=m; ++i)
H[i+2][j+2]=(t+1)*PREC;
int more=1;
bool cont=false;
strcpy(Y,"");
j=0;
state=0;
int count=0;
while(more)
{
if(!empty(state))
{
Y[j]=input(state);
recomputeH(j);
if(cuted(j)<=t)
{
Weight edd; // moje
if(final(next(state)) && (edd=H[(m)+2][(j)+2])<=t)
{
char* out=new char[j+2];
strncpy(out,Y,j+1);
out[j+1]='\0';
// if(cont) putchar(' ');
cont=true;
// printf("%1.2f %s\n", (float)edd/PREC,out); // moje
// cout << out << "(" << edd << ")" << endl;
tab.add(out,(float)edd/PREC);
count++;
}
path[j++]=state;
state=next(state);
continue;
}
else
if(continued(state))
{
state++;
continue;
}
}
//backtracking
do
if(j>0)
j--;
else
more=0;
while(more && !continued(path[j]));
state=path[j]+1;
}
return count;
}
//---------------------------------------------------------------------------

39
app/src/kor/corr.hh Executable file
View File

@ -0,0 +1,39 @@
//---------------------------------------------------------------------------
#ifndef _corr_hh
#define _corr_hh
//---------------------------------------------------------------------------
#include "../lib/tfti.h"
#include "../lib/word.h"
#include "corlist.h"
#include "../common/common.h"
class Corr : public TFTiv<char,char>
{
private:
Weight H[100][100];
char X[100]; // misspelled string
char Y[100]; // (possibly partial) candidate string
int m; // length of X
int n; // maximal length of Y
Weight ed(int,int);
Weight cuted(int);
void recomputeH(int);
public:
Weight (*H2)[100]; // moje: zmiana z int na Weight (float)
int t; // threshold
CorList CL; // moje
Corr() : H2((Weight(*)[100])&H[2][2]) {}; // moje (int->float)
Corr(const char* a) : TFTiv<char,char>(a), H2((Weight(*)[100])&H[2][2]) { };
int correct(const char* w, Words& tab);
int load2(char *Name); // moje
};
//---------------------------------------------------------------------------
#endif

155
app/src/kor/main.cc Executable file
View File

@ -0,0 +1,155 @@
#include <stdlib.h>
#include <ctype.h>
#include "../lib/iotools.h"
#include "common_cor.h"
#include "corr.hh"
#include <locale.h>
int main(int argc, char** argv) {
// setlocale(LC_CTYPE,"");
// setlocale(LC_COLLATE,"");
gengetopt_args_info args;
if(cmdline_parser(argc, argv, &args) != 0)
exit(1);
process_config_files(&args,argv[0]);
process_common_options(&args,argv[0]);
process_cor_options(&args);
Corr cor;
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// strcpy(dictionary,"cor.bin");
cor.load(dictionary);
cor.t=args.distance_arg;
//>>>>>>>>>>>>>>>
cor.CL.cor_stdcor=1*PREC;
cor.CL.cor_xchg=1*PREC;
if (cor.load2(file_weights)==-1) return -1; // moje
cor.t=1*PREC; // ODLEGLOSC EDYCYJNA
//<<<<<<<<<<<<<<
char line[MAX_LINE+1];
long line_count = 0;
Segment seg;
Words tab;
char form1[MAX_LINE];
char* form;
int formcasing;
char corfield[MAX_LINE]="";
while (fgets(line, MAX_LINE, inputf))
{
++line_count;
char outline[128];
if (!process_seg(line, args))
fputs(line, outputf);
else
{
char form[MAX_FORM];
tab.clear();
getfield(line,input_field_prefix,form);
if (form==NULL) continue;
formcasing=3;
cor.correct(form, tab);
if( tab.count() == 0 )
{
formcasing=casing(form);
if( formcasing == 1 || formcasing == 2)
tolowers(form, form1), cor.correct(form1, tab);
}
if ( tab.count() == 0)
fputs(line, failedf);
else
{
tab.sort();
if(args.replace_flag)
{
char corfield[128];
strcpy(corfield, input_field_prefix);
strcat(corfield, form);
seg.aux[seg.auxn]=corfield;
++seg.auxn;
for(int i=0; i<tab.count(); ++i)
{
seg.form=tab[i].form();
restorecasing(seg.form,seg.form,formcasing);
seg.print(outline);
fputs(outline, outputf);
}
--seg.auxn;
}
else
{
if(one_line)
{
char* p=corfield;
for(int i=tab.count()-1; i >= 0; --i)
{
if(tab[i].w_suf() > threshold) continue;
restorecasing(tab[i].form(),tab[i].form(),formcasing);
p += sprintf(p," %s%s,%1.2f",output_field_prefix,tab[i].form(),tab[i].w_suf());
}
sprintf(p,"\n");
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,corfield);
fputs(outline, outputf);
}
else if(one_field)
{
char* p=corfield;
p += sprintf(p," %s",output_field_prefix);
for(int i=tab.count()-1; i >= 0; --i)
{
if(tab[i].w_suf() > threshold) continue;
restorecasing(tab[i].form(),tab[i].form(),formcasing);
p += sprintf(p,(i==0)?"%s,%1.2f":";%s,%1.2f",tab[i].form(),tab[i].w_suf());
}
sprintf(p,"\n");
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,corfield);
fputs(outline, outputf);
}
else
{
for(int i=tab.count()-1; i >= 0; --i)
{
if(tab[i].w_suf() > threshold) continue;
restorecasing(tab[i].form(),tab[i].form(),formcasing);
sprintf(corfield," %s%s,%1.2f\n",output_field_prefix,tab[i].form(),tab[i].w_suf());
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,corfield);
fputs(outline, outputf);
}
}
}
}
}
if(args.interactive_flag)
{
fflush(outputf);
fflush(failedf);
}
}
cmdline_parser_free(&args);
}

View File

@ -22,7 +22,7 @@ void Word::autodescr(const char* fo, const char* de)
}
//---------------------------------------------------------------------------
int Word::cmp_w(Word a, Word b) {
bool Word::cmp_w(Word a, Word b) {
return (a.w_suf() > b.w_suf());
}
//---------------------------------------------------------------------------
@ -107,7 +107,7 @@ int Words::next() {
}
//---------------------------------------------------------------------------
void Words::sort() {
void Words::sort() {
std::sort(tab.begin(), tab.end(), Word::cmp_w);
}
@ -123,9 +123,15 @@ int Words::add(const char* fo)
if (cnt>=tab.capacity()-1)
tab.resize(tab.size()*2);
tab[cnt].form(fo);
tab[cnt].w_suf(0.0);
Word o;
o.form(fo);
o.w_suf(0.0);
tab.push_back(o);
// tab[cnt].form(fo);
// tab[cnt].w_suf(0.0);
// if(cnt<MAX-1) {
/* tab.push_back(new Word());
tab[cnt]->form(fo);
@ -136,6 +142,30 @@ int Words::add(const char* fo)
//return -1;
}
//---------------------------------------------------------------------------
//TYMCZASOWO TAK(DLA CORA)
int Words::add(const char* fo, float weight)
{
int i = find(fo);
if(i!=-1) {
return i;
}
if (cnt>=tab.capacity()-1)
tab.resize(tab.size()*2);
Word o;
o.form(fo);
o.w_suf(weight);
tab.push_back(o);
// tab[cnt].form(fo);
// tab[cnt].w_suf(weight);
return cnt++;
// }
//return -1;
}
//---------------------------------------------------------------------------
int Words::add(const char* fo, const char* des)

View File

@ -12,6 +12,8 @@
using namespace std;
class Word
{
public:
@ -36,7 +38,7 @@ private:
float _w_suf;
// float _w_pref;
public:
static int cmp_w(Word a, Word b);
static bool cmp_w(Word a, Word b);
Word() : _len_suf(-1) { *f='\0'; returned=0; };
Word(const char* fo, const char* des) : _len_suf(-1) { autodescr(fo,des); _w_suf=1.0; returned=0; };
@ -76,6 +78,7 @@ public:
};
inline Word::Word(const Word& word)
{ strcpy(f,word.f); strcpy(l,word.l); strcpy(d,word.d); _len_suf=word._len_suf; _w_suf=word._w_suf; returned = 0; }
@ -118,6 +121,7 @@ class Words
int count() const { return cnt; }
void clear() { cnt=0; tab.clear(); }
int add(const char* fo);
int add(const char* fo, float weight);
int add(const char* fo, const char* des);
/* zwraca index nastepnego wyniku, podczas pierwszego wywolania

View File

@ -10,6 +10,8 @@ use locale;
use Getopt::Long;
use File::HomeDir;
my $max_form_length = 50;
my $interactive=0;
my $help;
@ -20,7 +22,7 @@ my $userconfigfile=home()."/.utt/tok.conf";
#read configuration files###########################
my $file;
foreach $file ($systemconfigfile, $userconfigfile){
if(open(CONFIG, $configfile1)){
if(open(CONFIG, $file)){
while (<CONFIG>) {
chomp;
s/#.*//;
@ -74,17 +76,32 @@ while(<>)
# | [^[:print:]] (?{seg("B",$&)})
sub min {
my ($val1, $val2) = @_;
if($val1 < $val2) {
return $val1;
}
else {
return $val2;
}
}
sub seg
{
my ($tag,$match) = @_;
my $len=length $match;
printf "%04d %02d %s ", $offset, $len, $tag;
my $length = length $match;
my $idx = 0;
while($idx < $length) {
my $l = min $max_form_length, $length - $idx;
my $m = substr $match, $idx, $l;
printf "%04d %02d %s ", $offset + $idx, $l, $tag;
if($tag eq 'S')
{
for(my $i=0; $i<$len; ++$i)
for(my $i=0; $i<$l; ++$i)
{
my $c = substr $match, $i, 1;
my $c = substr $m, $i, 1;
print '_' if $c eq ' ';
print '\n' if $c eq "\n";
print '\t' if $c eq "\t";
@ -94,12 +111,15 @@ sub seg
}
elsif($tag eq 'B')
{
printf "\\x%02X", ord($match);
printf "\\x%02X", ord($m);
}
else
{
print $match;
print $m;
}
print "\n";
$offset += $len;
}
$idx += $l;
} # while($idx < $length)
$offset += $length;
} #sub seg