76 lines
2.3 KiB
Python
76 lines
2.3 KiB
Python
import sys
|
|
|
|
sys.setrecursionlimit(4500)
|
|
|
|
def AddTransition(transitions, state_from, state_to, symbol):
|
|
if state_from in transitions:
|
|
if symbol not in transitions[state_from]:
|
|
transitions[state_from][symbol] = {state_to}
|
|
else:
|
|
transitions[state_from][symbol] |= {state_to}
|
|
else:
|
|
transitions[state_from] = {symbol: {state_to}}
|
|
|
|
def AddFinalState(final_states, state):
|
|
final_states.add(state)
|
|
|
|
def GetFinalStates(transitions, final_states, string, current_state):
|
|
if not string:
|
|
return current_state if current_state in final_states else -1
|
|
|
|
symbol, rest = string[0], string[1:]
|
|
|
|
if current_state in transitions and symbol in transitions[current_state]:
|
|
for state in transitions[current_state][symbol]:
|
|
result = GetFinalStates(transitions, final_states, rest, state)
|
|
if result != -1:
|
|
return result
|
|
return -1
|
|
|
|
def IsAccepted(transitions, final_states, string, initial_state):
|
|
final_state = GetFinalStates(transitions, final_states, string, initial_state)
|
|
return final_state in final_states
|
|
|
|
|
|
def ChangeInput(transitions, final_states, string, initial_state):
|
|
possible_ending_parts = [';N', ';V', ';ADJ']
|
|
valid_results = []
|
|
|
|
for part in possible_ending_parts:
|
|
copy_string = string + part
|
|
if IsAccepted(transitions, final_states, copy_string, initial_state):
|
|
valid_results.append(copy_string)
|
|
|
|
if not valid_results:
|
|
valid_results.append(string + ';OOV')
|
|
|
|
return valid_results
|
|
|
|
|
|
transitions = {}
|
|
final_states = set()
|
|
|
|
fsaDescr = sys.argv[1] # 'elem.arg'
|
|
inputFile = sys.argv[2] # 'elem.in'
|
|
outputFile = sys.argv[3] # 'elem.out'
|
|
|
|
with open(fsaDescr, 'r', encoding="utf-8") as description:
|
|
for line in description:
|
|
line = line.rstrip()
|
|
|
|
if len(line.split('\t')) == 3:
|
|
a, b, c = line.split('\t')
|
|
AddTransition(transitions, a, b, c)
|
|
else:
|
|
AddFinalState(final_states, line)
|
|
|
|
with open(inputFile, 'r', encoding="utf-8") as inFile, open(outputFile, 'w', encoding="utf-8") as outFile:
|
|
for line in inFile:
|
|
line = line.rstrip()
|
|
results = ChangeInput(transitions, final_states, line, '0')
|
|
results.sort()
|
|
for result in results:
|
|
outFile.write(result + '\n')
|
|
# print(result)
|
|
|