zaczalem poprawiac main.cc w guesie - nadal nie dziala

retusze w lem.cc - dziala jak dzialal
w konie byl blednie wpisany autor



git-svn-id: svn://atos.wmid.amu.edu.pl/utt@21 e293616e-ec6a-49c2-aa92-f4a8b91c5d16
This commit is contained in:
obrebski 2008-04-01 19:42:34 +00:00
parent e2bde98bb1
commit 8abee07844
3 changed files with 109 additions and 117 deletions

View File

@ -14,7 +14,7 @@
int main(int argc, char** argv) {
int non_standard_config=0;
// int non_standard_config=0;
gengetopt_args_info args;
@ -22,7 +22,6 @@ int main(int argc, char** argv) {
exit(1);
process_config_files(&args,argv[0]);
process_common_options(&args,argv[0]);
process_guess_options(&args);
@ -72,121 +71,116 @@ int main(int argc, char** argv) {
char outline[MAX_LINE];
char parms[MAX_LINE], desc[MAX_LINE], lemma[MAX_LINE];
long line_count = 0;
// printf("d_f=%s\n", dict_file);
Guess guess(dictionary);
int words_count=0;
time_t start_time = time(NULL);
Segment seg;
// Segment seg;
Words tab;
char* form; //[MAX_FORM];
while (fgets(line, MAX_LINE, inputf)==line) {
line_count++;
int start, len;
while (fgets(line, MAX_LINE, inputf))
{
line_count++;
int start, len;
line[strlen(line)-1] = '\0';
line[strlen(line)-1] = '\0';
if (!seg.parse(line)) {
fprintf(stderr, "B³±d w wej¶ciu (linia: %d)\n", line_count);
return -1;
if (!process_seg(line, args))
fputs(line,outputf);
else
{
char form[MAX_FORM];
words_count++;
tab.clear();
getfield(line,input_field_prefix,form);
if (form==NULL) continue;//BZDURA
guess.ana(form, tab);
if ((tab.count()==0) && (!args.no_fail_flag)) // no guesses - analysis was unsuccessful
fputs(line, failedf);
else
{
// if (copy_processed)
// fputs(line, stdout);
// continue;
// }
// we've got some guesses. Do we want to print it?
// if (args.only_fail_flag)
// continue;
float last_weight=0;
int i=0;
int count=0;
unsigned first=1;
char* parms_end = parms;
char last_lemma[MAX_LINE];
while ((i=tab.next()) != -1 && count++<guess_count) {
/* if we have "one-line" flag then everything goes in one segment as many fields,
* if we have "one-field" flag everything goes in one segment as ONE field:
* - diferent lemmas are separated with ';', sequent descriptions to one lemma
* are separated with ','
*/
if ((!first) && (tab[i].w_suf() < cut_off) || (tab[i].w_suf() < delta * last_weight)) {
break;
}
if (first) {
parms_end += sprintf(parms_end, "%s", field_prefix);
} else if (!args.one_field_flag)
parms_end += sprintf(parms_end, "%s", field_prefix);
if (!args.one_field_flag || strcmp(last_lemma, tab[i].lemma()) != 0) {
if (args.one_field_flag && !first)
parms_end += sprintf(parms_end, ";");
parms_end += sprintf(parms_end, "%s", tab[i].lemma());
strcpy(last_lemma, tab[i].lemma());
}
first=0;
last_weight = tab[i].w_suf();
if (!weights)
parms_end += sprintf(parms_end, ",%s:%d", tab[i].descr(), (int)tab[i].w_suf());
else
parms_end += sprintf(parms_end, ",%s", tab[i].descr());
if (!args.one_field_flag) {
seg.addfield(parms);
parms_end = parms;
}
if (!(args.one_field_flag || args.one_line_flag)) {
seg.print(outline);
fputs(outline, outputf);
--seg.auxn;
}
//if (copy_processed)
// fputs(outline, stdout);
} //while
if (args.one_field_flag)
seg.addfield(parms);
if (args.one_field_flag || args.one_line_flag){
seg.print(outline);
fputs(outline, outputf);
}
} else { // if (process_segment)
// jak to nie jest wyraz - to przepisz token na wyjscie.
// printtok(line, start, len, cat, form);
seg.print(outline);
fputs(outline, outputf);
if (copy_processed)
fputs(outline, stdout);
}
}
time_t end_time = time(NULL);
if (per_info) {
printf("Liczba s³ów: %d\n", words_count);
printf("Czas analizy: %d sekund\n", end_time-start_time);
}
cmdline_parser_free(&args);
}
if (process_seg(seg, args)) {
words_count++;
tab.clear();
if (args.input_field_given>0) {
form = getInput(args.input_field_arg, args.input_field_given, seg);
} else
form = seg.form;
if (NULL == form) {
continue;
}
guess.ana(form, tab);
if ((tab.count()==0) && (!args.no_fail_flag)) {
// no guesses - analysis was unsuccessful
seg.print(outline); //this is necessary - seg.parse destroys line...
fputs(outline, failedf);
if (copy_processed)
fputs(line, stdout);
continue;
}
// we've got some guesses. Do we want to print it?
if (args.only_fail_flag)
continue;
float last_weight=0;
int i=0;
int count=0;
unsigned first=1;
char* parms_end = parms;
char last_lemma[MAX_LINE];
while ((i=tab.next()) != -1 && count++<guess_count) {
/* if we have "one-line" flag then everything goes in one segment as many fields,
* if we have "one-field" flag everything goes in one segment as ONE field:
* - diferent lemmas are separated with ';', sequent descriptions to one lemma
* are separated with ','
*/
if ((!first) && (tab[i].w_suf() < cut_off) || (tab[i].w_suf() < delta * last_weight)) {
break;
}
if (first) {
parms_end += sprintf(parms_end, "%s", field_prefix);
} else if (!args.one_field_flag)
parms_end += sprintf(parms_end, "%s", field_prefix);
if (!args.one_field_flag || strcmp(last_lemma, tab[i].lemma()) != 0) {
if (args.one_field_flag && !first)
parms_end += sprintf(parms_end, ";");
parms_end += sprintf(parms_end, "%s", tab[i].lemma());
strcpy(last_lemma, tab[i].lemma());
}
first=0;
last_weight = tab[i].w_suf();
if (!weights)
parms_end += sprintf(parms_end, ",%s:%d", tab[i].descr(), (int)tab[i].w_suf());
else
parms_end += sprintf(parms_end, ",%s", tab[i].descr());
if (!args.one_field_flag) {
seg.addfield(parms);
parms_end = parms;
}
if (!(args.one_field_flag || args.one_line_flag)) {
seg.print(outline);
fputs(outline, outputf);
--seg.auxn;
}
//if (copy_processed)
// fputs(outline, stdout);
} //while
if (args.one_field_flag)
seg.addfield(parms);
if (args.one_field_flag || args.one_line_flag){
seg.print(outline);
fputs(outline, outputf);
}
} else { // if (process_segment)
// jak to nie jest wyraz - to przepisz token na wyjscie.
// printtok(line, start, len, cat, form);
seg.print(outline);
fputs(outline, outputf);
if (copy_processed)
fputs(outline, stdout);
}
}
time_t end_time = time(NULL);
if (per_info) {
printf("Liczba s³ów: %d\n", words_count);
printf("Czas analizy: %d sekund\n", end_time-start_time);
}
cmdline_parser_free(&args);
}

View File

@ -3,7 +3,7 @@
#package: UAM Text Tools
#component: kon (search context)
#version: 1.0
#author: Tomasz Obrebski
#author: Justyna Walkowska
use strict;
use Getopt::Long;

View File

@ -39,9 +39,7 @@ int main(int argc, char** argv) {
while (fgets(line, MAX_LINE, inputf))
{
// strcpy(outline,line);
++line_count;
int start, len;
if (!process_seg(line, args)) // TO POWINNO BYC WCZESNIEJ ZABEZPIECZONE
@ -52,7 +50,7 @@ int main(int argc, char** argv) {
tab.clear();
getfield(line,input_field_prefix,form);
if (form==NULL) continue;
if (form==NULL) continue;//BZDURA
lem->ana(form, tab);
if(tab.count()==0)