wmt-2020-pl-en/postprocess.py

29 lines
888 B
Python

import re
with open("dev-0/out_temp.tsv",encoding="utf-8") as out_dev_temp:
lines_dev = out_dev_temp.readlines()
out_dev = open("dev-0/out.tsv","w+",encoding="utf-8")
with open("test-A/out_temp.tsv",encoding="utf-8") as out_test_temp:
lines_test = out_test_temp.readlines()
out_test = open("test-A/out.tsv","w+",encoding="utf-8")
pattern = re.compile(r'D-[0-9]+\t[-]*[0-9]+[\.]*[0-9]*\t.*')
for line in lines_dev:
# line = re.sub(r'D-[0-9]+\t[-]*[0-9]+[\.]*[0-9]*\t.*','',line)
if pattern.match(line):
line = re.sub(r'D-[0-9]+\t[-]*[0-9]+[\.]*[0-9]*\t\(*\)* *','',line)
out_dev.write(line)
for line in lines_test:
# line = re.sub(r'D-[0-9]+\t[-]*[0-9]+[\.]*[0-9]*\t.*','',line)
if pattern.match(line):
line = re.sub(r'D-[0-9]+\t[-]*[0-9]+[\.]*[0-9]*\t\(*\)* *','',line)
out_test.write(line)
out_dev.close()
out_test.close()