cw5 tetragram
This commit is contained in:
parent
b7e27a1f1d
commit
37a762f54e
|
@ -116,10 +116,13 @@ def candidates(left_context, right_context):
|
||||||
cand = sorted(list(cand.items()), key=lambda x: x[1], reverse=True)[:5]
|
cand = sorted(list(cand.items()), key=lambda x: x[1], reverse=True)[:5]
|
||||||
norm = [(x[0], float(x[1]) / sum([y[1] for y in cand])) for x in cand]
|
norm = [(x[0], float(x[1]) / sum([y[1] for y in cand])) for x in cand]
|
||||||
for index, elem in enumerate(norm):
|
for index, elem in enumerate(norm):
|
||||||
|
unk = None
|
||||||
if 'UNK' in elem:
|
if 'UNK' in elem:
|
||||||
unk = norm.pop(index)
|
unk = norm.pop(index)
|
||||||
norm.append(('', unk[1]))
|
norm.append(('', unk[1]))
|
||||||
break
|
break
|
||||||
|
if unk is None:
|
||||||
|
norm[-1] = ('', norm[-1][1])
|
||||||
return ' '.join([f'{x[0]}:{x[1]}' for x in norm])
|
return ' '.join([f'{x[0]}:{x[1]}' for x in norm])
|
||||||
|
|
||||||
|
|
||||||
|
|
784
dev-0/out.tsv
784
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
504
test-A/out.tsv
504
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue