2023-04-15 17:30:39 +02:00
def remove_empty_lines ( text ) :
lines = text . split ( ' \n ' )
non_empty_lines = [ ]
for line in lines :
if line . strip ( ) :
non_empty_lines . append ( line )
return ' \n ' . join ( non_empty_lines )
def join_lines ( text ) :
lines = text . split ( " \n " )
joined_lines = [ lines [ 0 ] ]
for line in lines [ 1 : ] :
if line . startswith ( " [ " ) :
joined_lines . append ( line )
else :
joined_lines [ - 1 ] + = " " + line
return " \n " . join ( joined_lines )
def process_text ( text , nazwisko_user , nazwisko_system ) :
lines = text . split ( ' \n ' )
result = [ ]
for line in lines :
if nazwisko_system in line :
result . append ( " system " + line . split ( nazwisko_system , 1 ) [ 1 ] . strip ( ) )
elif nazwisko_user in line :
result . append ( " user " + line . split ( nazwisko_user , 1 ) [ 1 ] . strip ( ) )
else :
result . append ( line )
return ' \n ' . join ( result )
def read_tsv ( file_path ) :
with open ( file_path , ' r ' , encoding = ' utf-8 ' ) as tsv_file :
content = tsv_file . read ( )
2023-04-15 22:01:33 +02:00
spaces = 0
pos_of_names = [ ]
for letter in range ( 15 , len ( content ) ) :
if content [ letter ] == " \n " or content [ letter ] == " " :
spaces = spaces + 1
pos_of_names . append ( letter )
if spaces == 2 :
break
for letter in range ( pos_of_names [ 1 ] , len ( content ) ) :
if content [ letter ] == " ] " :
pos_of_names . append ( letter + 2 )
for letter2 in range ( pos_of_names [ 2 ] , len ( content ) ) :
if content [ letter2 ] == " \n " :
pos_of_names . append ( letter2 )
spaces = spaces + 1
break
if spaces == 3 :
break
user = content [ 14 : pos_of_names [ 1 ] ]
operator = content [ pos_of_names [ 2 ] : pos_of_names [ 3 ] ]
return content , user , operator
2023-04-15 17:30:39 +02:00
def write_tsv ( file_path , content ) :
with open ( file_path , ' w ' , encoding = ' utf-8 ' ) as tsv_file :
tsv_file . write ( content )
def replace_first_space_with_tab ( text ) :
lines = text . split ( " \n " )
new_lines = [ ]
for line in lines :
first_space_index = line . find ( " " )
if first_space_index != - 1 :
new_line = line [ : first_space_index ] + " \t " + line [ first_space_index + 1 : ]
new_lines . append ( new_line )
else :
new_lines . append ( line )
return " \n " . join ( new_lines )
2023-04-15 22:01:33 +02:00
def do_all ( filename ) :
path = " C:/Users/macty/OneDrive/Pulpit/conversations "
file_path = path + " / " + filename
text , user , operator = read_tsv ( file_path )
2023-04-15 17:30:39 +02:00
text = remove_empty_lines ( text )
text = join_lines ( text )
2023-04-15 22:01:33 +02:00
text = process_text ( text , user , operator )
2023-04-15 17:30:39 +02:00
text = replace_first_space_with_tab ( text )
write_tsv ( file_path , text )
2023-04-15 22:01:33 +02:00
if __name__ == ' __main__ ' :
import os
for filename in os . listdir ( " C:/Users/macty/OneDrive/Pulpit/conversations " ) :
do_all ( filename )
#nie działa dla sytuacji gdzie ktoś na początku pisze 2 razy zanim dostanie odpowiedź, ale nie chce mi sie kombinować z poprawianiem tego dla jednej sytuacji