import sys import csv from numpy import append DEBUG = False alfabet = "xyz" SEMICOLON = ";" # sys.setrecursionlimit(4100) def print_debug(*args, **kwargs): if DEBUG: print(*args, **kwargs) if len(sys.argv) == 1: print("Default arguments parsed\n") sys.argv.append("medium.arg") sys.argv.append("medium.in") sys.argv.append("medium.exp") with open(sys.argv[1], mode="r", newline="", encoding="utf8") as csvfile: filereader = csv.reader(csvfile, delimiter="\t", quotechar="|") fsa_description = list(filereader) # skip first line with comment if fsa_description[0][0][0] == "#": fsa_description = fsa_description[1:] # get accepting states accepting_states = [] fsa_description_map: dict[tuple[str, str], list[str]] = {} fsa_description_direction: dict[str, list[tuple[str, str]]] = {} is_error = False for num, item in enumerate(fsa_description): if len(item) == 1: accepting_states.append(item[0]) continue for letter in item[2]: if letter not in alfabet: print_debug( "WARNING - letter not in alfabet: ", letter, "| line:", num + 1 ) tupleItems = (item[0], letter) if (item[0], letter) in fsa_description_map: print_debug( f"WARNING - duplicate letter: key = ({item[0]} | {letter}) | line: {num + 1}" ) # append to description map or create it if it doesn't exist fsa_description_map[tupleItems] = fsa_description_map.get( tupleItems, [] ) + [item[1]] fsa_description_direction[item[0]] = fsa_description_direction.get( item[0], [] ) + [(item[1], letter)] if is_error: exit(-1) states = {} for item in fsa_description_map: states[item[0]] = states[item[0]] + item[1] if item[0] in states else item[1] from collections import Counter # check if all letters are used once def is_permutation(str1, str2): return Counter(str1) == Counter(str2) def find_missing_letters(str1, str2) -> str: missing_letters = "" for char in str2: if char not in str1: missing_letters += char return missing_letters for state in states: if not is_permutation(states[state], alfabet): print_debug( f"WARNING - state {state} doesn't match full alphabet: {states[state]} | {alfabet} | diff - {find_missing_letters(states[state], alfabet)}" ) # exit(-1) with open(sys.argv[2], mode="r", newline="", encoding="utf8") as file: content = file.read() test_in = content.splitlines() with open(sys.argv[3], mode="r", newline="", encoding="utf8") as file: content = file.read() test_out = content.splitlines() def is_correct(current_state: str) -> bool: if current_state in accepting_states: return True return False def traverse_to_description(current_state: str) -> list[str]: if current_state not in fsa_description_direction: return [""] # the end good_end_states: list[str] = [] for next_state, description in fsa_description_direction[current_state]: new_good_end_states = traverse_to_description(next_state) for new_good_end_state in new_good_end_states: if is_correct(next_state) or new_good_end_state != "": good_end_states.append(description + new_good_end_state) return good_end_states def find_descriptions(current_state: str, word: str) -> list[str]: state_after_word = traverse_word(current_state, word) if state_after_word == "-1": return [word + ";OOV"] words_with_description: list[str] = [] for next_state in fsa_description_map[(state_after_word, SEMICOLON)]: good_endings = traverse_to_description(next_state) for good_ending in good_endings: words_with_description.append(word + ";" + good_ending) sorted_descriptions = sorted(words_with_description, key=lambda x: x.split(";")[1]) return sorted_descriptions def traverse_word(current_state: str, word: str) -> str: if len(word) == 0: return current_state ending_state = "" letter = word[0] if (current_state, letter) in fsa_description_map: next_states = fsa_description_map[(current_state, letter)] for next_state in next_states: ending_state = traverse_word(next_state, word[1:]) return ending_state return "-1" # unspecified next state is_difference = [] test_out_numerator = 0 for word in test_in: current_state = "0" descriptions = [] if len(word) != 0: descriptions = find_descriptions(current_state, word) for description in descriptions: print(str(test_out_numerator + 1) + "\t", end="") if description != test_out[test_out_numerator]: print(f"ERROR\t{word} | {description}") is_difference.append(test_out_numerator + 1) else: print(f"OK\t{description}") test_out_numerator += 1 if len(is_difference) != 0: print(is_difference)