zaczalem poprawiac main.cc w guesie - nadal nie dziala
retusze w lem.cc - dziala jak dzialal w konie byl blednie wpisany autor git-svn-id: svn://atos.wmid.amu.edu.pl/utt@21 e293616e-ec6a-49c2-aa92-f4a8b91c5d16
This commit is contained in:
parent
e2bde98bb1
commit
8abee07844
@ -14,7 +14,7 @@
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
int non_standard_config=0;
|
||||
// int non_standard_config=0;
|
||||
|
||||
gengetopt_args_info args;
|
||||
|
||||
@ -22,7 +22,6 @@ int main(int argc, char** argv) {
|
||||
exit(1);
|
||||
|
||||
process_config_files(&args,argv[0]);
|
||||
|
||||
process_common_options(&args,argv[0]);
|
||||
process_guess_options(&args);
|
||||
|
||||
@ -72,121 +71,116 @@ int main(int argc, char** argv) {
|
||||
char outline[MAX_LINE];
|
||||
char parms[MAX_LINE], desc[MAX_LINE], lemma[MAX_LINE];
|
||||
long line_count = 0;
|
||||
// printf("d_f=%s\n", dict_file);
|
||||
|
||||
Guess guess(dictionary);
|
||||
int words_count=0;
|
||||
time_t start_time = time(NULL);
|
||||
|
||||
Segment seg;
|
||||
// Segment seg;
|
||||
Words tab;
|
||||
char* form; //[MAX_FORM];
|
||||
while (fgets(line, MAX_LINE, inputf)==line) {
|
||||
line_count++;
|
||||
int start, len;
|
||||
while (fgets(line, MAX_LINE, inputf))
|
||||
{
|
||||
line_count++;
|
||||
int start, len;
|
||||
|
||||
line[strlen(line)-1] = '\0';
|
||||
line[strlen(line)-1] = '\0';
|
||||
|
||||
if (!seg.parse(line)) {
|
||||
fprintf(stderr, "B³±d w wej¶ciu (linia: %d)\n", line_count);
|
||||
return -1;
|
||||
if (!process_seg(line, args))
|
||||
fputs(line,outputf);
|
||||
else
|
||||
{
|
||||
char form[MAX_FORM];
|
||||
words_count++;
|
||||
tab.clear();
|
||||
getfield(line,input_field_prefix,form);
|
||||
if (form==NULL) continue;//BZDURA
|
||||
|
||||
guess.ana(form, tab);
|
||||
|
||||
if ((tab.count()==0) && (!args.no_fail_flag)) // no guesses - analysis was unsuccessful
|
||||
fputs(line, failedf);
|
||||
else
|
||||
{
|
||||
|
||||
// if (copy_processed)
|
||||
// fputs(line, stdout);
|
||||
// continue;
|
||||
// }
|
||||
// we've got some guesses. Do we want to print it?
|
||||
// if (args.only_fail_flag)
|
||||
// continue;
|
||||
|
||||
float last_weight=0;
|
||||
int i=0;
|
||||
int count=0;
|
||||
unsigned first=1;
|
||||
char* parms_end = parms;
|
||||
char last_lemma[MAX_LINE];
|
||||
|
||||
while ((i=tab.next()) != -1 && count++<guess_count) {
|
||||
/* if we have "one-line" flag then everything goes in one segment as many fields,
|
||||
* if we have "one-field" flag everything goes in one segment as ONE field:
|
||||
* - diferent lemmas are separated with ';', sequent descriptions to one lemma
|
||||
* are separated with ','
|
||||
*/
|
||||
if ((!first) && (tab[i].w_suf() < cut_off) || (tab[i].w_suf() < delta * last_weight)) {
|
||||
break;
|
||||
}
|
||||
if (first) {
|
||||
parms_end += sprintf(parms_end, "%s", field_prefix);
|
||||
} else if (!args.one_field_flag)
|
||||
parms_end += sprintf(parms_end, "%s", field_prefix);
|
||||
|
||||
if (!args.one_field_flag || strcmp(last_lemma, tab[i].lemma()) != 0) {
|
||||
if (args.one_field_flag && !first)
|
||||
parms_end += sprintf(parms_end, ";");
|
||||
parms_end += sprintf(parms_end, "%s", tab[i].lemma());
|
||||
strcpy(last_lemma, tab[i].lemma());
|
||||
}
|
||||
|
||||
first=0;
|
||||
|
||||
last_weight = tab[i].w_suf();
|
||||
if (!weights)
|
||||
parms_end += sprintf(parms_end, ",%s:%d", tab[i].descr(), (int)tab[i].w_suf());
|
||||
else
|
||||
parms_end += sprintf(parms_end, ",%s", tab[i].descr());
|
||||
|
||||
if (!args.one_field_flag) {
|
||||
seg.addfield(parms);
|
||||
parms_end = parms;
|
||||
}
|
||||
|
||||
if (!(args.one_field_flag || args.one_line_flag)) {
|
||||
seg.print(outline);
|
||||
fputs(outline, outputf);
|
||||
--seg.auxn;
|
||||
}
|
||||
//if (copy_processed)
|
||||
// fputs(outline, stdout);
|
||||
} //while
|
||||
|
||||
if (args.one_field_flag)
|
||||
seg.addfield(parms);
|
||||
|
||||
if (args.one_field_flag || args.one_line_flag){
|
||||
seg.print(outline);
|
||||
fputs(outline, outputf);
|
||||
}
|
||||
} else { // if (process_segment)
|
||||
// jak to nie jest wyraz - to przepisz token na wyjscie.
|
||||
// printtok(line, start, len, cat, form);
|
||||
seg.print(outline);
|
||||
fputs(outline, outputf);
|
||||
if (copy_processed)
|
||||
fputs(outline, stdout);
|
||||
}
|
||||
}
|
||||
time_t end_time = time(NULL);
|
||||
if (per_info) {
|
||||
printf("Liczba s³ów: %d\n", words_count);
|
||||
printf("Czas analizy: %d sekund\n", end_time-start_time);
|
||||
}
|
||||
cmdline_parser_free(&args);
|
||||
}
|
||||
|
||||
if (process_seg(seg, args)) {
|
||||
words_count++;
|
||||
tab.clear();
|
||||
if (args.input_field_given>0) {
|
||||
form = getInput(args.input_field_arg, args.input_field_given, seg);
|
||||
} else
|
||||
form = seg.form;
|
||||
|
||||
if (NULL == form) {
|
||||
continue;
|
||||
}
|
||||
|
||||
guess.ana(form, tab);
|
||||
|
||||
if ((tab.count()==0) && (!args.no_fail_flag)) {
|
||||
// no guesses - analysis was unsuccessful
|
||||
seg.print(outline); //this is necessary - seg.parse destroys line...
|
||||
fputs(outline, failedf);
|
||||
if (copy_processed)
|
||||
fputs(line, stdout);
|
||||
continue;
|
||||
}
|
||||
// we've got some guesses. Do we want to print it?
|
||||
if (args.only_fail_flag)
|
||||
continue;
|
||||
|
||||
float last_weight=0;
|
||||
int i=0;
|
||||
int count=0;
|
||||
unsigned first=1;
|
||||
char* parms_end = parms;
|
||||
char last_lemma[MAX_LINE];
|
||||
|
||||
while ((i=tab.next()) != -1 && count++<guess_count) {
|
||||
/* if we have "one-line" flag then everything goes in one segment as many fields,
|
||||
* if we have "one-field" flag everything goes in one segment as ONE field:
|
||||
* - diferent lemmas are separated with ';', sequent descriptions to one lemma
|
||||
* are separated with ','
|
||||
*/
|
||||
if ((!first) && (tab[i].w_suf() < cut_off) || (tab[i].w_suf() < delta * last_weight)) {
|
||||
break;
|
||||
}
|
||||
if (first) {
|
||||
parms_end += sprintf(parms_end, "%s", field_prefix);
|
||||
} else if (!args.one_field_flag)
|
||||
parms_end += sprintf(parms_end, "%s", field_prefix);
|
||||
|
||||
if (!args.one_field_flag || strcmp(last_lemma, tab[i].lemma()) != 0) {
|
||||
if (args.one_field_flag && !first)
|
||||
parms_end += sprintf(parms_end, ";");
|
||||
parms_end += sprintf(parms_end, "%s", tab[i].lemma());
|
||||
strcpy(last_lemma, tab[i].lemma());
|
||||
}
|
||||
|
||||
first=0;
|
||||
|
||||
last_weight = tab[i].w_suf();
|
||||
if (!weights)
|
||||
parms_end += sprintf(parms_end, ",%s:%d", tab[i].descr(), (int)tab[i].w_suf());
|
||||
else
|
||||
parms_end += sprintf(parms_end, ",%s", tab[i].descr());
|
||||
|
||||
if (!args.one_field_flag) {
|
||||
seg.addfield(parms);
|
||||
parms_end = parms;
|
||||
}
|
||||
|
||||
if (!(args.one_field_flag || args.one_line_flag)) {
|
||||
seg.print(outline);
|
||||
fputs(outline, outputf);
|
||||
--seg.auxn;
|
||||
}
|
||||
//if (copy_processed)
|
||||
// fputs(outline, stdout);
|
||||
} //while
|
||||
|
||||
if (args.one_field_flag)
|
||||
seg.addfield(parms);
|
||||
|
||||
if (args.one_field_flag || args.one_line_flag){
|
||||
seg.print(outline);
|
||||
fputs(outline, outputf);
|
||||
}
|
||||
} else { // if (process_segment)
|
||||
// jak to nie jest wyraz - to przepisz token na wyjscie.
|
||||
// printtok(line, start, len, cat, form);
|
||||
seg.print(outline);
|
||||
fputs(outline, outputf);
|
||||
if (copy_processed)
|
||||
fputs(outline, stdout);
|
||||
}
|
||||
}
|
||||
time_t end_time = time(NULL);
|
||||
if (per_info) {
|
||||
printf("Liczba s³ów: %d\n", words_count);
|
||||
printf("Czas analizy: %d sekund\n", end_time-start_time);
|
||||
}
|
||||
cmdline_parser_free(&args);
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
#package: UAM Text Tools
|
||||
#component: kon (search context)
|
||||
#version: 1.0
|
||||
#author: Tomasz Obrebski
|
||||
#author: Justyna Walkowska
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
|
@ -39,9 +39,7 @@ int main(int argc, char** argv) {
|
||||
|
||||
while (fgets(line, MAX_LINE, inputf))
|
||||
{
|
||||
// strcpy(outline,line);
|
||||
++line_count;
|
||||
|
||||
int start, len;
|
||||
|
||||
if (!process_seg(line, args)) // TO POWINNO BYC WCZESNIEJ ZABEZPIECZONE
|
||||
@ -52,7 +50,7 @@ int main(int argc, char** argv) {
|
||||
|
||||
tab.clear();
|
||||
getfield(line,input_field_prefix,form);
|
||||
if (form==NULL) continue;
|
||||
if (form==NULL) continue;//BZDURA
|
||||
|
||||
lem->ana(form, tab);
|
||||
if(tab.count()==0)
|
||||
|
Loading…
Reference in New Issue
Block a user