67 lines
2.2 KiB
Python
67 lines
2.2 KiB
Python
import codecs
|
|
import sys
|
|
|
|
def process_input(fsa, input_string):
|
|
current_state = 0
|
|
output = ""
|
|
for symbol in input_string:
|
|
if (current_state, symbol) in fsa:
|
|
current_state = fsa[(current_state, symbol)]
|
|
else:
|
|
return f"{input_string};OOV"
|
|
|
|
if (current_state, ";") in fsa:
|
|
current_state = fsa[(current_state, ";")]
|
|
code_paths = check_transition(fsa, current_state, 24)
|
|
|
|
if code_paths is None:
|
|
return f"{input_string};OOV"
|
|
|
|
for code in sorted(code_paths):
|
|
code_str = "".join(code)
|
|
output += f"{input_string};{code_str}\n"
|
|
|
|
return output
|
|
|
|
def check_transition(fsa, start_state, target_state):
|
|
def find_transitions(state, target_state, current_path, all_paths):
|
|
if state == target_state:
|
|
all_paths.append(current_path.copy())
|
|
return
|
|
|
|
for (s, symbol), next_state in fsa.items():
|
|
if s == state:
|
|
current_path.append(symbol)
|
|
find_transitions(next_state, target_state, current_path, all_paths)
|
|
current_path.pop()
|
|
|
|
current_state = start_state
|
|
all_paths = []
|
|
find_transitions(current_state, target_state, [], all_paths)
|
|
|
|
if not all_paths:
|
|
return None
|
|
|
|
return all_paths
|
|
|
|
fsa_file = "./multi.arg" # FSA FILE
|
|
fsa = {}
|
|
|
|
with open(fsa_file, 'r', encoding='utf-8') as f:
|
|
for line in f:
|
|
parts = line.strip().split('\t')
|
|
if len(parts) == 3:
|
|
state, next_state, symbol = parts
|
|
fsa[(int(state), symbol)] = int(next_state)
|
|
|
|
input_file_path = "./multi.in" # INPUT STRING
|
|
output_file_path = "test.exp" # OUTPUT FILE
|
|
|
|
with open(input_file_path, 'r', encoding='utf-8') as file, open(output_file_path, 'w', encoding='utf-8') as output_file:
|
|
for line in file:
|
|
input_string = line.strip()
|
|
result = process_input(fsa, input_string)
|
|
output_file.write(result)
|
|
|
|
# POCZĄTKOWO WSZYSTKIE ŚCIEŻKI DO PLIKÓW BYŁY PRZESYŁANE POPRZEZ TERMINAL ALE PRZEZ WZGLĄD NA POLSKIE ZNAKI MUSIAŁAM ZAMIESZCZAĆ ŚCIEŻKI DO PLIKÓW W KODZIE
|
|
# NIESTETY NIE UDAŁO MI SIĘ ZNALEŹĆ W JAKI SPOSÓB PRZESYŁAĆ PLIKI BY UŻYWAĆ UTF-8 |