zaczalem poprawiac main.cc w guesie - nadal nie dziala
retusze w lem.cc - dziala jak dzialal w konie byl blednie wpisany autor git-svn-id: svn://atos.wmid.amu.edu.pl/utt@21 e293616e-ec6a-49c2-aa92-f4a8b91c5d16
This commit is contained in:
parent
e2bde98bb1
commit
8abee07844
@ -14,7 +14,7 @@
|
|||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
|
||||||
int non_standard_config=0;
|
// int non_standard_config=0;
|
||||||
|
|
||||||
gengetopt_args_info args;
|
gengetopt_args_info args;
|
||||||
|
|
||||||
@ -22,7 +22,6 @@ int main(int argc, char** argv) {
|
|||||||
exit(1);
|
exit(1);
|
||||||
|
|
||||||
process_config_files(&args,argv[0]);
|
process_config_files(&args,argv[0]);
|
||||||
|
|
||||||
process_common_options(&args,argv[0]);
|
process_common_options(&args,argv[0]);
|
||||||
process_guess_options(&args);
|
process_guess_options(&args);
|
||||||
|
|
||||||
@ -72,121 +71,116 @@ int main(int argc, char** argv) {
|
|||||||
char outline[MAX_LINE];
|
char outline[MAX_LINE];
|
||||||
char parms[MAX_LINE], desc[MAX_LINE], lemma[MAX_LINE];
|
char parms[MAX_LINE], desc[MAX_LINE], lemma[MAX_LINE];
|
||||||
long line_count = 0;
|
long line_count = 0;
|
||||||
// printf("d_f=%s\n", dict_file);
|
|
||||||
Guess guess(dictionary);
|
Guess guess(dictionary);
|
||||||
int words_count=0;
|
int words_count=0;
|
||||||
time_t start_time = time(NULL);
|
time_t start_time = time(NULL);
|
||||||
|
|
||||||
Segment seg;
|
// Segment seg;
|
||||||
Words tab;
|
Words tab;
|
||||||
char* form; //[MAX_FORM];
|
while (fgets(line, MAX_LINE, inputf))
|
||||||
while (fgets(line, MAX_LINE, inputf)==line) {
|
{
|
||||||
line_count++;
|
line_count++;
|
||||||
int start, len;
|
int start, len;
|
||||||
|
|
||||||
|
line[strlen(line)-1] = '\0';
|
||||||
|
|
||||||
|
if (!process_seg(line, args))
|
||||||
|
fputs(line,outputf);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
char form[MAX_FORM];
|
||||||
|
words_count++;
|
||||||
|
tab.clear();
|
||||||
|
getfield(line,input_field_prefix,form);
|
||||||
|
if (form==NULL) continue;//BZDURA
|
||||||
|
|
||||||
|
guess.ana(form, tab);
|
||||||
|
|
||||||
|
if ((tab.count()==0) && (!args.no_fail_flag)) // no guesses - analysis was unsuccessful
|
||||||
|
fputs(line, failedf);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
|
||||||
line[strlen(line)-1] = '\0';
|
// if (copy_processed)
|
||||||
|
// fputs(line, stdout);
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
// we've got some guesses. Do we want to print it?
|
||||||
|
// if (args.only_fail_flag)
|
||||||
|
// continue;
|
||||||
|
|
||||||
if (!seg.parse(line)) {
|
float last_weight=0;
|
||||||
fprintf(stderr, "B³±d w wej¶ciu (linia: %d)\n", line_count);
|
int i=0;
|
||||||
return -1;
|
int count=0;
|
||||||
|
unsigned first=1;
|
||||||
|
char* parms_end = parms;
|
||||||
|
char last_lemma[MAX_LINE];
|
||||||
|
|
||||||
|
while ((i=tab.next()) != -1 && count++<guess_count) {
|
||||||
|
/* if we have "one-line" flag then everything goes in one segment as many fields,
|
||||||
|
* if we have "one-field" flag everything goes in one segment as ONE field:
|
||||||
|
* - diferent lemmas are separated with ';', sequent descriptions to one lemma
|
||||||
|
* are separated with ','
|
||||||
|
*/
|
||||||
|
if ((!first) && (tab[i].w_suf() < cut_off) || (tab[i].w_suf() < delta * last_weight)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (first) {
|
||||||
|
parms_end += sprintf(parms_end, "%s", field_prefix);
|
||||||
|
} else if (!args.one_field_flag)
|
||||||
|
parms_end += sprintf(parms_end, "%s", field_prefix);
|
||||||
|
|
||||||
|
if (!args.one_field_flag || strcmp(last_lemma, tab[i].lemma()) != 0) {
|
||||||
|
if (args.one_field_flag && !first)
|
||||||
|
parms_end += sprintf(parms_end, ";");
|
||||||
|
parms_end += sprintf(parms_end, "%s", tab[i].lemma());
|
||||||
|
strcpy(last_lemma, tab[i].lemma());
|
||||||
|
}
|
||||||
|
|
||||||
|
first=0;
|
||||||
|
|
||||||
|
last_weight = tab[i].w_suf();
|
||||||
|
if (!weights)
|
||||||
|
parms_end += sprintf(parms_end, ",%s:%d", tab[i].descr(), (int)tab[i].w_suf());
|
||||||
|
else
|
||||||
|
parms_end += sprintf(parms_end, ",%s", tab[i].descr());
|
||||||
|
|
||||||
|
if (!args.one_field_flag) {
|
||||||
|
seg.addfield(parms);
|
||||||
|
parms_end = parms;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(args.one_field_flag || args.one_line_flag)) {
|
||||||
|
seg.print(outline);
|
||||||
|
fputs(outline, outputf);
|
||||||
|
--seg.auxn;
|
||||||
|
}
|
||||||
|
//if (copy_processed)
|
||||||
|
// fputs(outline, stdout);
|
||||||
|
} //while
|
||||||
|
|
||||||
|
if (args.one_field_flag)
|
||||||
|
seg.addfield(parms);
|
||||||
|
|
||||||
|
if (args.one_field_flag || args.one_line_flag){
|
||||||
|
seg.print(outline);
|
||||||
|
fputs(outline, outputf);
|
||||||
|
}
|
||||||
|
} else { // if (process_segment)
|
||||||
|
// jak to nie jest wyraz - to przepisz token na wyjscie.
|
||||||
|
// printtok(line, start, len, cat, form);
|
||||||
|
seg.print(outline);
|
||||||
|
fputs(outline, outputf);
|
||||||
|
if (copy_processed)
|
||||||
|
fputs(outline, stdout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
time_t end_time = time(NULL);
|
||||||
|
if (per_info) {
|
||||||
|
printf("Liczba s³ów: %d\n", words_count);
|
||||||
|
printf("Czas analizy: %d sekund\n", end_time-start_time);
|
||||||
|
}
|
||||||
|
cmdline_parser_free(&args);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (process_seg(seg, args)) {
|
|
||||||
words_count++;
|
|
||||||
tab.clear();
|
|
||||||
if (args.input_field_given>0) {
|
|
||||||
form = getInput(args.input_field_arg, args.input_field_given, seg);
|
|
||||||
} else
|
|
||||||
form = seg.form;
|
|
||||||
|
|
||||||
if (NULL == form) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
guess.ana(form, tab);
|
|
||||||
|
|
||||||
if ((tab.count()==0) && (!args.no_fail_flag)) {
|
|
||||||
// no guesses - analysis was unsuccessful
|
|
||||||
seg.print(outline); //this is necessary - seg.parse destroys line...
|
|
||||||
fputs(outline, failedf);
|
|
||||||
if (copy_processed)
|
|
||||||
fputs(line, stdout);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// we've got some guesses. Do we want to print it?
|
|
||||||
if (args.only_fail_flag)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
float last_weight=0;
|
|
||||||
int i=0;
|
|
||||||
int count=0;
|
|
||||||
unsigned first=1;
|
|
||||||
char* parms_end = parms;
|
|
||||||
char last_lemma[MAX_LINE];
|
|
||||||
|
|
||||||
while ((i=tab.next()) != -1 && count++<guess_count) {
|
|
||||||
/* if we have "one-line" flag then everything goes in one segment as many fields,
|
|
||||||
* if we have "one-field" flag everything goes in one segment as ONE field:
|
|
||||||
* - diferent lemmas are separated with ';', sequent descriptions to one lemma
|
|
||||||
* are separated with ','
|
|
||||||
*/
|
|
||||||
if ((!first) && (tab[i].w_suf() < cut_off) || (tab[i].w_suf() < delta * last_weight)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (first) {
|
|
||||||
parms_end += sprintf(parms_end, "%s", field_prefix);
|
|
||||||
} else if (!args.one_field_flag)
|
|
||||||
parms_end += sprintf(parms_end, "%s", field_prefix);
|
|
||||||
|
|
||||||
if (!args.one_field_flag || strcmp(last_lemma, tab[i].lemma()) != 0) {
|
|
||||||
if (args.one_field_flag && !first)
|
|
||||||
parms_end += sprintf(parms_end, ";");
|
|
||||||
parms_end += sprintf(parms_end, "%s", tab[i].lemma());
|
|
||||||
strcpy(last_lemma, tab[i].lemma());
|
|
||||||
}
|
|
||||||
|
|
||||||
first=0;
|
|
||||||
|
|
||||||
last_weight = tab[i].w_suf();
|
|
||||||
if (!weights)
|
|
||||||
parms_end += sprintf(parms_end, ",%s:%d", tab[i].descr(), (int)tab[i].w_suf());
|
|
||||||
else
|
|
||||||
parms_end += sprintf(parms_end, ",%s", tab[i].descr());
|
|
||||||
|
|
||||||
if (!args.one_field_flag) {
|
|
||||||
seg.addfield(parms);
|
|
||||||
parms_end = parms;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!(args.one_field_flag || args.one_line_flag)) {
|
|
||||||
seg.print(outline);
|
|
||||||
fputs(outline, outputf);
|
|
||||||
--seg.auxn;
|
|
||||||
}
|
|
||||||
//if (copy_processed)
|
|
||||||
// fputs(outline, stdout);
|
|
||||||
} //while
|
|
||||||
|
|
||||||
if (args.one_field_flag)
|
|
||||||
seg.addfield(parms);
|
|
||||||
|
|
||||||
if (args.one_field_flag || args.one_line_flag){
|
|
||||||
seg.print(outline);
|
|
||||||
fputs(outline, outputf);
|
|
||||||
}
|
|
||||||
} else { // if (process_segment)
|
|
||||||
// jak to nie jest wyraz - to przepisz token na wyjscie.
|
|
||||||
// printtok(line, start, len, cat, form);
|
|
||||||
seg.print(outline);
|
|
||||||
fputs(outline, outputf);
|
|
||||||
if (copy_processed)
|
|
||||||
fputs(outline, stdout);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
time_t end_time = time(NULL);
|
|
||||||
if (per_info) {
|
|
||||||
printf("Liczba s³ów: %d\n", words_count);
|
|
||||||
printf("Czas analizy: %d sekund\n", end_time-start_time);
|
|
||||||
}
|
|
||||||
cmdline_parser_free(&args);
|
|
||||||
}
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
#package: UAM Text Tools
|
#package: UAM Text Tools
|
||||||
#component: kon (search context)
|
#component: kon (search context)
|
||||||
#version: 1.0
|
#version: 1.0
|
||||||
#author: Tomasz Obrebski
|
#author: Justyna Walkowska
|
||||||
|
|
||||||
use strict;
|
use strict;
|
||||||
use Getopt::Long;
|
use Getopt::Long;
|
||||||
|
@ -39,9 +39,7 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
while (fgets(line, MAX_LINE, inputf))
|
while (fgets(line, MAX_LINE, inputf))
|
||||||
{
|
{
|
||||||
// strcpy(outline,line);
|
|
||||||
++line_count;
|
++line_count;
|
||||||
|
|
||||||
int start, len;
|
int start, len;
|
||||||
|
|
||||||
if (!process_seg(line, args)) // TO POWINNO BYC WCZESNIEJ ZABEZPIECZONE
|
if (!process_seg(line, args)) // TO POWINNO BYC WCZESNIEJ ZABEZPIECZONE
|
||||||
@ -52,7 +50,7 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
tab.clear();
|
tab.clear();
|
||||||
getfield(line,input_field_prefix,form);
|
getfield(line,input_field_prefix,form);
|
||||||
if (form==NULL) continue;
|
if (form==NULL) continue;//BZDURA
|
||||||
|
|
||||||
lem->ana(form, tab);
|
lem->ana(form, tab);
|
||||||
if(tab.count()==0)
|
if(tab.count()==0)
|
||||||
|
Loading…
Reference in New Issue
Block a user