Skrypt do generowania CSV z surowych danych
This commit is contained in:
parent
b97f99d3ef
commit
d00f603f17
51
data.py
Normal file
51
data.py
Normal file
@ -0,0 +1,51 @@
|
||||
import csv
|
||||
|
||||
operators_and_users = {
|
||||
# operator
|
||||
("06", "Krzysztof Bojakowski"): [
|
||||
# users
|
||||
("01", "Mikołaj Gawron"),
|
||||
("05", "Patryk Osiński"),
|
||||
("09", "Bartosz Wieczorek"),
|
||||
("07", "Sergiusz Kański"),
|
||||
("03", "Dawid Korzępa"),
|
||||
]
|
||||
}
|
||||
|
||||
OP_ANON = "system"
|
||||
USER_ANON = "user"
|
||||
|
||||
for operator, users in operators_and_users.items():
|
||||
op_id, op_name = operator
|
||||
for (user_id, user_name) in users:
|
||||
with open(f"./data/dialog-{op_id}-{user_id}-01.tsv", "w", newline='\n', encoding="utf-8") as csvfile:
|
||||
spamwriter = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL)
|
||||
spamwriter.writerow(["kto", "treść"])
|
||||
with open(f"./data/raw/dialog-{op_id}-{user_id}-01.txt", "r", encoding="utf-8") as f:
|
||||
whos_typing = None
|
||||
text = ""
|
||||
while True:
|
||||
line = f.readline().strip()
|
||||
if not line: # eof
|
||||
break
|
||||
|
||||
if user_name not in line and op_name not in line:
|
||||
if text != "":
|
||||
text += f"\\n{line}"
|
||||
else:
|
||||
text = line
|
||||
|
||||
if user_name in line:
|
||||
if text != "" and whos_typing != USER_ANON:
|
||||
spamwriter.writerow([whos_typing, f"'{text}'"])
|
||||
text = ""
|
||||
whos_typing = USER_ANON
|
||||
|
||||
if op_name in line:
|
||||
if text != "" and whos_typing != OP_ANON:
|
||||
spamwriter.writerow([whos_typing, f"'{text}'"])
|
||||
text = ""
|
||||
whos_typing = OP_ANON
|
||||
|
||||
if text != "":
|
||||
spamwriter.writerow([whos_typing, f"'{text}'"])
|
Loading…
Reference in New Issue
Block a user