jfz-2023-s473555/TaskC05/run.py

172 lines
5.1 KiB
Python
Raw Normal View History

2023-11-27 02:02:29 +01:00
import sys
import csv
from numpy import append
DEBUG = False
alfabet = "xyz"
SEMICOLON = ";"
# sys.setrecursionlimit(4100)
def print_debug(*args, **kwargs):
if DEBUG:
print(*args, **kwargs)
if len(sys.argv) == 1:
print("Default arguments parsed\n")
2023-11-28 00:45:11 +01:00
sys.argv.append("medium.arg")
sys.argv.append("medium.in")
sys.argv.append("medium.exp")
2023-11-27 02:02:29 +01:00
with open(sys.argv[1], mode="r", newline="", encoding="utf8") as csvfile:
filereader = csv.reader(csvfile, delimiter="\t", quotechar="|")
fsa_description = list(filereader)
# skip first line with comment
if fsa_description[0][0][0] == "#":
fsa_description = fsa_description[1:]
# get accepting states
2023-11-28 00:45:11 +01:00
accepting_states = []
2023-11-27 02:02:29 +01:00
fsa_description_map: dict[tuple[str, str], list[str]] = {}
fsa_description_direction: dict[str, list[tuple[str, str]]] = {}
is_error = False
for num, item in enumerate(fsa_description):
2023-11-28 00:45:11 +01:00
if len(item) == 1:
accepting_states.append(item[0])
continue
2023-11-27 02:02:29 +01:00
for letter in item[2]:
if letter not in alfabet:
print_debug(
"WARNING - letter not in alfabet: ", letter, "| line:", num + 1
)
tupleItems = (item[0], letter)
if (item[0], letter) in fsa_description_map:
print_debug(
f"WARNING - duplicate letter: key = ({item[0]} | {letter}) | line: {num + 1}"
)
# append to description map or create it if it doesn't exist
fsa_description_map[tupleItems] = fsa_description_map.get(
tupleItems, []
) + [item[1]]
2023-11-28 00:45:11 +01:00
fsa_description_direction[item[0]] = fsa_description_direction.get(
item[0], []
) + [(item[1], letter)]
2023-11-27 02:02:29 +01:00
if is_error:
exit(-1)
states = {}
for item in fsa_description_map:
states[item[0]] = states[item[0]] + item[1] if item[0] in states else item[1]
from collections import Counter
# check if all letters are used once
def is_permutation(str1, str2):
return Counter(str1) == Counter(str2)
def find_missing_letters(str1, str2) -> str:
missing_letters = ""
for char in str2:
if char not in str1:
missing_letters += char
return missing_letters
for state in states:
if not is_permutation(states[state], alfabet):
print_debug(
f"WARNING - state {state} doesn't match full alphabet: {states[state]} | {alfabet} | diff - {find_missing_letters(states[state], alfabet)}"
)
# exit(-1)
with open(sys.argv[2], mode="r", newline="", encoding="utf8") as file:
content = file.read()
test_in = content.splitlines()
with open(sys.argv[3], mode="r", newline="", encoding="utf8") as file:
content = file.read()
test_out = content.splitlines()
def is_correct(current_state: str) -> bool:
2023-11-28 00:45:11 +01:00
if current_state in accepting_states:
2023-11-27 02:02:29 +01:00
return True
return False
2023-11-28 00:45:11 +01:00
def traverse_to_description(current_state: str) -> list[str]:
2023-11-27 02:02:29 +01:00
if current_state not in fsa_description_direction:
2023-11-28 00:45:11 +01:00
return [""] # the end
2023-11-27 02:02:29 +01:00
2023-11-28 00:45:11 +01:00
good_end_states: list[str] = []
2023-11-27 02:02:29 +01:00
for next_state, description in fsa_description_direction[current_state]:
2023-11-28 00:45:11 +01:00
new_good_end_states = traverse_to_description(next_state)
for new_good_end_state in new_good_end_states:
if is_correct(next_state) or new_good_end_state != "":
good_end_states.append(description + new_good_end_state)
2023-11-27 02:02:29 +01:00
2023-11-28 00:45:11 +01:00
return good_end_states
2023-11-27 02:02:29 +01:00
2023-11-28 00:45:11 +01:00
def find_descriptions(current_state: str, word: str) -> list[str]:
state_after_word = traverse_word(current_state, word)
if state_after_word == "-1":
return [word + ";OOV"]
words_with_description: list[str] = []
for next_state in fsa_description_map[(state_after_word, SEMICOLON)]:
good_endings = traverse_to_description(next_state)
for good_ending in good_endings:
words_with_description.append(word + ";" + good_ending)
2023-11-27 02:02:29 +01:00
2023-11-28 00:45:11 +01:00
sorted_descriptions = sorted(words_with_description, key=lambda x: x.split(";")[1])
return sorted_descriptions
2023-11-27 02:02:29 +01:00
2023-11-28 00:45:11 +01:00
def traverse_word(current_state: str, word: str) -> str:
if len(word) == 0:
return current_state
ending_state = ""
letter = word[0]
if (current_state, letter) in fsa_description_map:
next_states = fsa_description_map[(current_state, letter)]
for next_state in next_states:
ending_state = traverse_word(next_state, word[1:])
return ending_state
return "-1" # unspecified next state
2023-11-27 02:02:29 +01:00
is_difference = []
2023-11-28 00:45:11 +01:00
test_out_numerator = 0
for word in test_in:
2023-11-27 02:02:29 +01:00
current_state = "0"
2023-11-28 00:45:11 +01:00
descriptions = []
2023-11-27 02:02:29 +01:00
if len(word) != 0:
2023-11-28 00:45:11 +01:00
descriptions = find_descriptions(current_state, word)
for description in descriptions:
print(str(test_out_numerator + 1) + "\t", end="")
if description != test_out[test_out_numerator]:
print(f"ERROR\t{word} | {description}")
is_difference.append(test_out_numerator + 1)
else:
print(f"OK\t{description}")
test_out_numerator += 1
2023-11-27 02:02:29 +01:00
if len(is_difference) != 0:
print(is_difference)