jfz-2023-s473555/TaskC05/run.py

import sys
import csv

from numpy import append

DEBUG = False
alfabet = "xyz"
SEMICOLON = ";"

# sys.setrecursionlimit(4100)


def print_debug(*args, **kwargs):
    if DEBUG:
        print(*args, **kwargs)


if len(sys.argv) == 1:
    print("Default arguments parsed\n")
    sys.argv.append("medium.arg")
    sys.argv.append("medium.in")
    sys.argv.append("medium.exp")

with open(sys.argv[1], mode="r", newline="", encoding="utf8") as csvfile:
    filereader = csv.reader(csvfile, delimiter="\t", quotechar="|")
    fsa_description = list(filereader)

    # skip first line with comment
    if fsa_description[0][0][0] == "#":
        fsa_description = fsa_description[1:]

    # get accepting states
    accepting_states = []

    fsa_description_map: dict[tuple[str, str], list[str]] = {}
    fsa_description_direction: dict[str, list[tuple[str, str]]] = {}
    is_error = False
    for num, item in enumerate(fsa_description):
        if len(item) == 1:
            accepting_states.append(item[0])
            continue

        for letter in item[2]:
            if letter not in alfabet:
                print_debug(
                    "WARNING - letter not in alfabet: ", letter, "| line:", num + 1
                )
            tupleItems = (item[0], letter)
            if (item[0], letter) in fsa_description_map:
                print_debug(
                    f"WARNING - duplicate letter: key = ({item[0]} | {letter}) | line: {num + 1}"
                )
            # append to description map or create it if it doesn't exist
            fsa_description_map[tupleItems] = fsa_description_map.get(
                tupleItems, []
            ) + [item[1]]
            fsa_description_direction[item[0]] = fsa_description_direction.get(
                item[0], []
            ) + [(item[1], letter)]

    if is_error:
        exit(-1)

states = {}
for item in fsa_description_map:
    states[item[0]] = states[item[0]] + item[1] if item[0] in states else item[1]


from collections import Counter


# check if all letters are used once
def is_permutation(str1, str2):
    return Counter(str1) == Counter(str2)


def find_missing_letters(str1, str2) -> str:
    missing_letters = ""
    for char in str2:
        if char not in str1:
            missing_letters += char
    return missing_letters


for state in states:
    if not is_permutation(states[state], alfabet):
        print_debug(
            f"WARNING - state {state} doesn't match full alphabet: {states[state]} | {alfabet} | diff - {find_missing_letters(states[state], alfabet)}"
        )
        # exit(-1)


with open(sys.argv[2], mode="r", newline="", encoding="utf8") as file:
    content = file.read()
    test_in = content.splitlines()

with open(sys.argv[3], mode="r", newline="", encoding="utf8") as file:
    content = file.read()
    test_out = content.splitlines()


def is_correct(current_state: str) -> bool:
    if current_state in accepting_states:
        return True
    return False


def traverse_to_description(current_state: str) -> list[str]:
    if current_state not in fsa_description_direction:
        return [""]  # the end

    good_end_states: list[str] = []
    for next_state, description in fsa_description_direction[current_state]:
        new_good_end_states = traverse_to_description(next_state)
        for new_good_end_state in new_good_end_states:
            if is_correct(next_state) or new_good_end_state != "":
                good_end_states.append(description + new_good_end_state)

    return good_end_states


def find_descriptions(current_state: str, word: str) -> list[str]:
    state_after_word = traverse_word(current_state, word)
    if state_after_word == "-1":
        return [word + ";OOV"]

    words_with_description: list[str] = []
    for next_state in fsa_description_map[(state_after_word, SEMICOLON)]:
        good_endings = traverse_to_description(next_state)
        for good_ending in good_endings:
            words_with_description.append(word + ";" + good_ending)

    sorted_descriptions = sorted(words_with_description, key=lambda x: x.split(";")[1])
    return sorted_descriptions


def traverse_word(current_state: str, word: str) -> str:
    if len(word) == 0:
        return current_state

    ending_state = ""
    letter = word[0]
    if (current_state, letter) in fsa_description_map:
        next_states = fsa_description_map[(current_state, letter)]
        for next_state in next_states:
            ending_state = traverse_word(next_state, word[1:])
            return ending_state

    return "-1"  # unspecified next state


is_difference = []
test_out_numerator = 0
for word in test_in:
    current_state = "0"
    descriptions = []
    if len(word) != 0:
        descriptions = find_descriptions(current_state, word)

    for description in descriptions:
        print(str(test_out_numerator + 1) + "\t", end="")
        if description != test_out[test_out_numerator]:
            print(f"ERROR\t{word} | {description}")
            is_difference.append(test_out_numerator + 1)
        else:
            print(f"OK\t{description}")

        test_out_numerator += 1

if len(is_difference) != 0:
    print(is_difference)
TaskC05 work in progress 2023-11-27 02:02:29 +01:00			`import sys`
			`import csv`

			`from numpy import append`

			`DEBUG = False`
			`alfabet = "xyz"`
			`SEMICOLON = ";"`

			`# sys.setrecursionlimit(4100)`


			`def print_debug(args, *kwargs):`
			`if DEBUG:`
			`print(args, *kwargs)`


			`if len(sys.argv) == 1:`
			`print("Default arguments parsed\n")`
TaskC05 2023-11-28 00:45:11 +01:00			`sys.argv.append("medium.arg")`
			`sys.argv.append("medium.in")`
			`sys.argv.append("medium.exp")`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00
			`with open(sys.argv[1], mode="r", newline="", encoding="utf8") as csvfile:`
			`filereader = csv.reader(csvfile, delimiter="\t", quotechar="\|")`
			`fsa_description = list(filereader)`

			`# skip first line with comment`
			`if fsa_description[0][0][0] == "#":`
			`fsa_description = fsa_description[1:]`

			`# get accepting states`
TaskC05 2023-11-28 00:45:11 +01:00			`accepting_states = []`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00
			`fsa_description_map: dict[tuple[str, str], list[str]] = {}`
			`fsa_description_direction: dict[str, list[tuple[str, str]]] = {}`
			`is_error = False`
			`for num, item in enumerate(fsa_description):`
TaskC05 2023-11-28 00:45:11 +01:00			`if len(item) == 1:`
			`accepting_states.append(item[0])`
			`continue`

TaskC05 work in progress 2023-11-27 02:02:29 +01:00			`for letter in item[2]:`
			`if letter not in alfabet:`
			`print_debug(`
			`"WARNING - letter not in alfabet: ", letter, "\| line:", num + 1`
			`)`
			`tupleItems = (item[0], letter)`
			`if (item[0], letter) in fsa_description_map:`
			`print_debug(`
			`f"WARNING - duplicate letter: key = ({item[0]} \| {letter}) \| line: {num + 1}"`
			`)`
			`# append to description map or create it if it doesn't exist`
			`fsa_description_map[tupleItems] = fsa_description_map.get(`
			`tupleItems, []`
			`) + [item[1]]`
TaskC05 2023-11-28 00:45:11 +01:00			`fsa_description_direction[item[0]] = fsa_description_direction.get(`
			`item[0], []`
			`) + [(item[1], letter)]`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00
			`if is_error:`
			`exit(-1)`

			`states = {}`
			`for item in fsa_description_map:`
			`states[item[0]] = states[item[0]] + item[1] if item[0] in states else item[1]`


			`from collections import Counter`


			`# check if all letters are used once`
			`def is_permutation(str1, str2):`
			`return Counter(str1) == Counter(str2)`


			`def find_missing_letters(str1, str2) -> str:`
			`missing_letters = ""`
			`for char in str2:`
			`if char not in str1:`
			`missing_letters += char`
			`return missing_letters`


			`for state in states:`
			`if not is_permutation(states[state], alfabet):`
			`print_debug(`
			`f"WARNING - state {state} doesn't match full alphabet: {states[state]} \| {alfabet} \| diff - {find_missing_letters(states[state], alfabet)}"`
			`)`
			`# exit(-1)`


			`with open(sys.argv[2], mode="r", newline="", encoding="utf8") as file:`
			`content = file.read()`
			`test_in = content.splitlines()`

			`with open(sys.argv[3], mode="r", newline="", encoding="utf8") as file:`
			`content = file.read()`
			`test_out = content.splitlines()`


			`def is_correct(current_state: str) -> bool:`
TaskC05 2023-11-28 00:45:11 +01:00			`if current_state in accepting_states:`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00			`return True`
			`return False`


TaskC05 2023-11-28 00:45:11 +01:00			`def traverse_to_description(current_state: str) -> list[str]:`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00			`if current_state not in fsa_description_direction:`
TaskC05 2023-11-28 00:45:11 +01:00			`return [""] # the end`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00
TaskC05 2023-11-28 00:45:11 +01:00			`good_end_states: list[str] = []`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00			`for next_state, description in fsa_description_direction[current_state]:`
TaskC05 2023-11-28 00:45:11 +01:00			`new_good_end_states = traverse_to_description(next_state)`
			`for new_good_end_state in new_good_end_states:`
			`if is_correct(next_state) or new_good_end_state != "":`
			`good_end_states.append(description + new_good_end_state)`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00
TaskC05 2023-11-28 00:45:11 +01:00			`return good_end_states`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00

TaskC05 2023-11-28 00:45:11 +01:00			`def find_descriptions(current_state: str, word: str) -> list[str]:`
			`state_after_word = traverse_word(current_state, word)`
			`if state_after_word == "-1":`
			`return [word + ";OOV"]`

			`words_with_description: list[str] = []`
			`for next_state in fsa_description_map[(state_after_word, SEMICOLON)]:`
			`good_endings = traverse_to_description(next_state)`
			`for good_ending in good_endings:`
			`words_with_description.append(word + ";" + good_ending)`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00
TaskC05 2023-11-28 00:45:11 +01:00			`sorted_descriptions = sorted(words_with_description, key=lambda x: x.split(";")[1])`
			`return sorted_descriptions`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00
TaskC05 2023-11-28 00:45:11 +01:00
			`def traverse_word(current_state: str, word: str) -> str:`
			`if len(word) == 0:`
			`return current_state`

			`ending_state = ""`
			`letter = word[0]`
			`if (current_state, letter) in fsa_description_map:`
			`next_states = fsa_description_map[(current_state, letter)]`
			`for next_state in next_states:`
			`ending_state = traverse_word(next_state, word[1:])`
			`return ending_state`

			`return "-1" # unspecified next state`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00

			`is_difference = []`
TaskC05 2023-11-28 00:45:11 +01:00			`test_out_numerator = 0`
			`for word in test_in:`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00			`current_state = "0"`
TaskC05 2023-11-28 00:45:11 +01:00			`descriptions = []`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00			`if len(word) != 0:`
TaskC05 2023-11-28 00:45:11 +01:00			`descriptions = find_descriptions(current_state, word)`

			`for description in descriptions:`
			`print(str(test_out_numerator + 1) + "\t", end="")`
			`if description != test_out[test_out_numerator]:`
			`print(f"ERROR\t{word} \| {description}")`
			`is_difference.append(test_out_numerator + 1)`
			`else:`
			`print(f"OK\t{description}")`

			`test_out_numerator += 1`
TaskC05 work in progress 2023-11-27 02:02:29 +01:00
			`if len(is_difference) != 0:`
			`print(is_difference)`