102 lines
3.0 KiB
Python
102 lines
3.0 KiB
Python
def remove_empty_lines(text):
|
|
lines = text.split('\n')
|
|
|
|
non_empty_lines = []
|
|
for line in lines:
|
|
if line.strip():
|
|
non_empty_lines.append(line)
|
|
|
|
return '\n'.join(non_empty_lines)
|
|
|
|
|
|
def join_lines(text):
|
|
lines = text.split("\n")
|
|
joined_lines = [lines[0]]
|
|
|
|
for line in lines[1:]:
|
|
if line.startswith("["):
|
|
joined_lines.append(line)
|
|
else:
|
|
joined_lines[-1] += " " + line
|
|
|
|
return "\n".join(joined_lines)
|
|
|
|
|
|
def process_text(text, nazwisko_user, nazwisko_system):
|
|
lines = text.split('\n')
|
|
result = []
|
|
for line in lines:
|
|
if nazwisko_system in line:
|
|
result.append("system " + line.split(nazwisko_system, 1)[1].strip())
|
|
elif nazwisko_user in line:
|
|
result.append("user " + line.split(nazwisko_user, 1)[1].strip())
|
|
else:
|
|
result.append(line)
|
|
return '\n'.join(result)
|
|
|
|
|
|
def read_tsv(file_path):
|
|
with open(file_path, 'r', encoding='utf-8') as tsv_file:
|
|
content = tsv_file.read()
|
|
spaces=0
|
|
pos_of_names = []
|
|
for letter in range(15,len(content)):
|
|
if content[letter] =="\n"or content[letter] ==" ":
|
|
spaces = spaces+1
|
|
pos_of_names.append(letter)
|
|
if spaces == 2:
|
|
break
|
|
for letter in range(pos_of_names[1],len(content)):
|
|
if content[letter] =="]":
|
|
pos_of_names.append(letter+2)
|
|
for letter2 in range(pos_of_names[2],len(content)):
|
|
if content[letter2] =="\n":
|
|
pos_of_names.append(letter2)
|
|
spaces=spaces+1
|
|
break
|
|
if spaces==3:
|
|
break
|
|
user = content[14:pos_of_names[1]]
|
|
operator = content[pos_of_names[2]:pos_of_names[3]]
|
|
return content,user,operator
|
|
|
|
|
|
def write_tsv(file_path, content):
|
|
with open(file_path, 'w', encoding='utf-8') as tsv_file:
|
|
tsv_file.write(content)
|
|
|
|
|
|
def replace_first_space_with_tab(text):
|
|
lines = text.split("\n")
|
|
new_lines = []
|
|
for line in lines:
|
|
first_space_index = line.find(" ")
|
|
if first_space_index != -1:
|
|
new_line = line[:first_space_index] + "\t" + line[first_space_index + 1:]
|
|
new_lines.append(new_line)
|
|
else:
|
|
new_lines.append(line)
|
|
return "\n".join(new_lines)
|
|
|
|
def do_all(filename):
|
|
path="C:/Users/macty/OneDrive/Pulpit/conversations"
|
|
file_path = path+"/"+filename
|
|
text,user,operator = read_tsv(file_path)
|
|
text = remove_empty_lines(text)
|
|
text = join_lines(text)
|
|
text = process_text(text, user, operator)
|
|
text = replace_first_space_with_tab(text)
|
|
write_tsv(file_path, text)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import os
|
|
|
|
for filename in os.listdir("C:/Users/macty/OneDrive/Pulpit/conversations"):
|
|
do_all(filename)
|
|
#nie działa dla sytuacji gdzie ktoś na początku pisze 2 razy zanim dostanie odpowiedź, ale nie chce mi sie kombinować z poprawianiem tego dla jednej sytuacji
|
|
|
|
|
|
|
|
|