emphatic_chatbot/deduplicator.py
2023-06-18 17:32:13 +02:00

7 lines
238 B
Python

lines_seen = set() # holds lines already seen
outfile = open("final.csv", "w")
for line in open("prompts.tsv", "r"):
if line not in lines_seen: # not a duplicate
outfile.write(line)
lines_seen.add(line)
outfile.close()