54 lines
2.0 KiB
Python
54 lines
2.0 KiB
Python
import csv
|
|
|
|
operators_and_users = {
|
|
# operator
|
|
("06", "Krzysztof Bojakowski"): [
|
|
# users
|
|
("01", "Mikołaj Gawron"),
|
|
("05", "Patryk Osiński"),
|
|
("09", "Bartosz Wieczorek"),
|
|
("07", "Sergiusz Kański"),
|
|
("03", "Dawid Korzępa"),
|
|
]
|
|
}
|
|
|
|
OP_ANON = "system"
|
|
USER_ANON = "user"
|
|
|
|
for operator, users in operators_and_users.items():
|
|
op_id, op_name = operator
|
|
for (user_id, user_name) in users:
|
|
with open(f"./data/dialog-{op_id}-{user_id}-01.tsv", "w", newline='\n', encoding="utf-8") as csvfile:
|
|
spamwriter = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL)
|
|
spamwriter.writerow(["kto", "treść"])
|
|
with open(f"./data/raw/dialog-{op_id}-{user_id}-01.txt", "r", encoding="utf-8") as f:
|
|
whos_typing = None
|
|
text = ""
|
|
while True:
|
|
line = f.readline().strip()
|
|
if '"' in line:
|
|
line = line.replace('"', "'") # Git preview can't display data with " in line
|
|
if not line: # eof
|
|
break
|
|
|
|
if user_name not in line and op_name not in line:
|
|
if text != "":
|
|
text += f"\\n{line}"
|
|
else:
|
|
text = line
|
|
|
|
if user_name in line:
|
|
if text != "" and whos_typing != USER_ANON:
|
|
spamwriter.writerow([whos_typing, f"{text}"])
|
|
text = ""
|
|
whos_typing = USER_ANON
|
|
|
|
if op_name in line:
|
|
if text != "" and whos_typing != OP_ANON:
|
|
spamwriter.writerow([whos_typing, f"{text}"])
|
|
text = ""
|
|
whos_typing = OP_ANON
|
|
|
|
if text != "":
|
|
spamwriter.writerow([whos_typing, f"{text}"])
|