2.3 KiB
2.3 KiB
import re
# Open input file in read mode and output file in write mode
with open('test-A/out.tsv', 'r', encoding="utf-8") as infile, open('test-A/parsed.tsv', 'w', encoding="utf-8") as outfile:
# Default text to use when text segment is missing
default_text = "Airconditioner"
# Iterate through each line in the input file
for line in infile:
# Regular expression to match "{...}" or "}"
match = re.search(r'({.*}|\\}[^{]*$)', line)
# If "{...}" or "}" is found
if match:
# Extract "{...}" or "}" from the line
data = match.group(0).strip()
# Extract the text before "{...}" or "}"
text = line[:match.start()].strip()
# If data is not enclosed within curly braces, enclose it
if not data.startswith('{'):
data = '{' + data
# If text segment is missing, use default text
if not text:
text = default_text
# Write to output file with the desired format
outfile.write(f"{text}\t{text}\t{data}\n")
else:
# If the line doesn't contain "{...}" or "}", write it as is
outfile.write(line)
# Done!
print("Data has been formatted and saved to output.txt")
Data has been formatted and saved to output.txt