20 lines
857 B
Python
20 lines
857 B
Python
import regex as re
|
|
|
|
filenames = ['test.co', 'test.er', 'train.co', 'train.er', 'tune.co', 'tune.er']
|
|
output_filenames = ['./plewi_co.txt',
|
|
'./plewi_er.txt',
|
|
'./plewi_co.txt',
|
|
'./plewi_er.txt',
|
|
'./plewi_co.txt',
|
|
'./plewi_er.txt']
|
|
|
|
for idx, filename in enumerate(filenames):
|
|
with open('./plewic/' + filename, encoding="utf-8", mode='r') as f:
|
|
with open(output_filenames[idx], encoding="utf-8", mode='w') as f2:
|
|
for line in f.readlines():
|
|
new_line = line.replace("\n", "").replace("\t", " ")
|
|
if re.match(r"^\!\s\'.*\'$", new_line):
|
|
new_line = new_line[3:len(new_line)-1]
|
|
elif re.match(r"^\!\s\".*\"$", new_line):
|
|
new_line = new_line[3:len(new_line)-1]
|
|
f2.write(new_line.strip() + "\n") |