import regex as re filenames = ['test.co', 'test.er', 'train.co', 'train.er', 'tune.co', 'tune.er'] output_filenames = ['./plewi_co.txt', './plewi_er.txt', './plewi_co.txt', './plewi_er.txt', './plewi_co.txt', './plewi_er.txt'] for idx, filename in enumerate(filenames): with open('./plewic/' + filename, encoding="utf-8", mode='r') as f: with open(output_filenames[idx], encoding="utf-8", mode='w') as f2: for line in f.readlines(): new_line = line.replace("\n", "").replace("\t", " ") if re.match(r"^\!\s\'.*\'$", new_line): new_line = new_line[3:len(new_line)-1] elif re.match(r"^\!\s\".*\"$", new_line): new_line = new_line[3:len(new_line)-1] f2.write(new_line.strip() + "\n")