jfz-2023-s473564/TaskC05/run.py

67 lines
2.2 KiB
Python
Raw Permalink Normal View History

2023-11-25 21:13:35 +01:00
import codecs
import sys
def process_input(fsa, input_string):
current_state = 0
output = ""
for symbol in input_string:
if (current_state, symbol) in fsa:
current_state = fsa[(current_state, symbol)]
else:
return f"{input_string};OOV"
if (current_state, ";") in fsa:
current_state = fsa[(current_state, ";")]
code_paths = check_transition(fsa, current_state, 24)
if code_paths is None:
return f"{input_string};OOV"
for code in sorted(code_paths):
code_str = "".join(code)
output += f"{input_string};{code_str}\n"
return output
def check_transition(fsa, start_state, target_state):
def find_transitions(state, target_state, current_path, all_paths):
if state == target_state:
all_paths.append(current_path.copy())
return
for (s, symbol), next_state in fsa.items():
if s == state:
current_path.append(symbol)
find_transitions(next_state, target_state, current_path, all_paths)
current_path.pop()
current_state = start_state
all_paths = []
find_transitions(current_state, target_state, [], all_paths)
if not all_paths:
return None
return all_paths
fsa_file = "./multi.arg" # FSA FILE
fsa = {}
with open(fsa_file, 'r', encoding='utf-8') as f:
for line in f:
parts = line.strip().split('\t')
if len(parts) == 3:
state, next_state, symbol = parts
fsa[(int(state), symbol)] = int(next_state)
input_file_path = "./multi.in" # INPUT STRING
output_file_path = "test.exp" # OUTPUT FILE
with open(input_file_path, 'r', encoding='utf-8') as file, open(output_file_path, 'w', encoding='utf-8') as output_file:
for line in file:
input_string = line.strip()
result = process_input(fsa, input_string)
output_file.write(result)
# POCZĄTKOWO WSZYSTKIE ŚCIEŻKI DO PLIKÓW BYŁY PRZESYŁANE POPRZEZ TERMINAL ALE PRZEZ WZGLĄD NA POLSKIE ZNAKI MUSIAŁAM ZAMIESZCZAĆ ŚCIEŻKI DO PLIKÓW W KODZIE
# NIESTETY NIE UDAŁO MI SIĘ ZNALEŹĆ W JAKI SPOSÓB PRZESYŁAĆ PLIKI BY UŻYWAĆ UTF-8