Skrypt do generowania CSV z surowych danych
This commit is contained in:
parent
b97f99d3ef
commit
d00f603f17
51
data.py
Normal file
51
data.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
import csv
|
||||||
|
|
||||||
|
operators_and_users = {
|
||||||
|
# operator
|
||||||
|
("06", "Krzysztof Bojakowski"): [
|
||||||
|
# users
|
||||||
|
("01", "Mikołaj Gawron"),
|
||||||
|
("05", "Patryk Osiński"),
|
||||||
|
("09", "Bartosz Wieczorek"),
|
||||||
|
("07", "Sergiusz Kański"),
|
||||||
|
("03", "Dawid Korzępa"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
OP_ANON = "system"
|
||||||
|
USER_ANON = "user"
|
||||||
|
|
||||||
|
for operator, users in operators_and_users.items():
|
||||||
|
op_id, op_name = operator
|
||||||
|
for (user_id, user_name) in users:
|
||||||
|
with open(f"./data/dialog-{op_id}-{user_id}-01.tsv", "w", newline='\n', encoding="utf-8") as csvfile:
|
||||||
|
spamwriter = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL)
|
||||||
|
spamwriter.writerow(["kto", "treść"])
|
||||||
|
with open(f"./data/raw/dialog-{op_id}-{user_id}-01.txt", "r", encoding="utf-8") as f:
|
||||||
|
whos_typing = None
|
||||||
|
text = ""
|
||||||
|
while True:
|
||||||
|
line = f.readline().strip()
|
||||||
|
if not line: # eof
|
||||||
|
break
|
||||||
|
|
||||||
|
if user_name not in line and op_name not in line:
|
||||||
|
if text != "":
|
||||||
|
text += f"\\n{line}"
|
||||||
|
else:
|
||||||
|
text = line
|
||||||
|
|
||||||
|
if user_name in line:
|
||||||
|
if text != "" and whos_typing != USER_ANON:
|
||||||
|
spamwriter.writerow([whos_typing, f"'{text}'"])
|
||||||
|
text = ""
|
||||||
|
whos_typing = USER_ANON
|
||||||
|
|
||||||
|
if op_name in line:
|
||||||
|
if text != "" and whos_typing != OP_ANON:
|
||||||
|
spamwriter.writerow([whos_typing, f"'{text}'"])
|
||||||
|
text = ""
|
||||||
|
whos_typing = OP_ANON
|
||||||
|
|
||||||
|
if text != "":
|
||||||
|
spamwriter.writerow([whos_typing, f"'{text}'"])
|
Loading…
Reference in New Issue
Block a user