change Task-G

2024-01-19 08:14:42 -08:00 · 2024-01-19 08:14:42 -08:00 · 7c1b86f28b
commit 7c1b86f28b
parent 322c4561dd
24 changed files with 211581 additions and 417674 deletions
--- a/TaskF00/polish_wiki_excerpt.in.txt
+++ b/TaskF00/polish_wiki_excerpt.in.txt
--- a/TaskF00/polish_wiki_excerpt.out
+++ b/TaskF00/polish_wiki_excerpt.out
--- a/TaskF00/run.py
+++ b/TaskF00/run.py
@ -0,0 +1,20 @@
 import re
 import sys
 def substitute_digits(match):
    digits = match.group()
    substitution_dict = {str(i): chr(ord('a') + i) for i in range(10)}
    return ''.join(substitution_dict[digit] for digit in digits)
 def substitute_4digits(input_string):
    pattern = re.compile(r'\d{4}')
    result = re.sub(pattern, substitute_digits, input_string)
    return result
 for line in sys.stdin:
    result = substitute_4digits(line)
    sys.stdout.write(result)
--- a/TaskF00/simple.in.txt
+++ b/TaskF00/simple.in.txt
--- a/TaskF00/simple.out
+++ b/TaskF00/simple.out
@ -0,0 +1,3 @@
 dece 34  dfd gfd 5
 f33sdfsdbcdedsfsdf
 3r
--- a/TaskF01/description.txt
+++ b/TaskF01/description.txt
@ -0,0 +1,22 @@
 Napisać program, który wczytuje kolejne wiersze ze standardowego
 wejścia i analizuje każdy wiersz (bez znaku końca wiersza). Należy w
 jak największym stopniu wykorzystać wyrażenia regularne (np. nie wolno
 użyć negacji jako operacji w danym języku programowania, jeśli da się
 to wyrazić w samym wyrażeniu regularnym). Tam, gdzie to możliwe należy
 użyć pojedynczego wyrażenia regularnego.
 Write a program, which loads consecutive lines from standard input
 and analyze every line (with no newline character). You should
 use regular expressions to the greatest extent possible (e.g. you
 can not use negation in the programming language if it is
 possible to express the same in regular expression). Wherever possible,
 use one regular expression.
 For each word with at least one lower case letter and one capital letter
 change every lower case letter to capital case and change every capital case
 letter to lower. In this task word means the string of "\w" metacharacters,
 lower case letter is [a-ząćęłńóśźż] class,
 capital case letter is [A-ZĄĆĘŁŃÓŚŹŻ] class.
 POINTS: 2
 DEADLINE: 2024-01-07 23:59:59
--- a/TaskF01/polish_wiki_excerpt.exp
+++ b/TaskF01/polish_wiki_excerpt.exp
--- a/TaskG00/polish_wiki_excerpt.in.txt
+++ b/TaskG00/polish_wiki_excerpt.in.txt
--- a/TaskF01/polish_wiki_excerpt.out
+++ b/TaskF01/polish_wiki_excerpt.out
--- a/TaskF01/run.py
+++ b/TaskF01/run.py
@ -0,0 +1,17 @@
 import re
 import sys
 def swap_case(match):
    word = match.group()
    return ''.join(c.lower() if c.isupper() else c.upper() for c in word)
 def transform_text(input_text):
    pattern = re.compile(r'\b(?=\w*[a-ząćęłńóśźż])(?=\w*[A-ZĄĆĘŁŃÓŚŹŻ])\w+\b')
    transformed_text = pattern.sub(swap_case, input_text)
    return transformed_text
 for line in sys.stdin:
    output_text = transform_text(line.strip())
    sys.stdout.write(output_text)
--- a/TaskF01/simple.exp
+++ b/TaskF01/simple.exp
@ -0,0 +1,3 @@
 ala mA KOTa
 lallaa
 żUK
--- a/TaskF01/simple.in
+++ b/TaskF01/simple.in
@ -0,0 +1,3 @@
 ala Ma kotA
 lallaa
 Żuk
--- a/TaskF01/simple.out
+++ b/TaskF01/simple.out
--- a/TaskG00/nazwiska.txt
+++ b/TaskG00/nazwiska.txt
--- a/TaskG00/run.py
+++ b/TaskG00/run.py
@ -1,28 +0,0 @@
 import re2
 def build_surname_regex(surnames):
    # Create a regular expression pattern from the list of surnames
    pattern = r'\b(?:' + '|'.join(surnames) + r')\b'
    return re2.compile(pattern)
 def main():
    # Load Polish surnames from the file
    with open('polish_surnames.txt', 'r', encoding='utf-8') as file:
        polish_surnames = [line.strip().lower() for line in file]
    # Build a DFA regex pattern from the list of surnames
    surname_regex = build_surname_regex(polish_surnames)
    while True:
        try:
            # Read a line from standard input
            line = input()
            # Check if the line contains any of the Polish surnames
            if surname_regex.search(line.lower()):
                print(line)
        except EOFError:
            break
 if __name__ == "__main__":
    main()
--- a/TaskG03/description.txt
+++ b/TaskG03/description.txt
@ -1,4 +1,4 @@
-Use regular expressions to extract lines containing polish surnames.
+Use regular expressions to extract lines containing polish surnames. CASE INSENSITIVE
 Download list of polish male and female surnames from here:
@ -7,17 +7,17 @@ Download list of polish male and female surnames from here:
 * https://dane.gov.pl/pl/dataset/1681,nazwiska-osob-zyjacych-wystepujace-w-rejestrze-pesel/resource/22817/table?page=1&per_page=20&q=&sort=
-Extract lines from stdin containing any of the surname.
+Extract lines from stdin containing any of the surnames.
-Look only for surnames in lowercase.
+Look only for surnames no matter casing (case insensitive).
 The surname does not have to be surrounded by space or any other special characters.
 Don't search for declined forms of surnames.
-Check either NFA (e.g. re python library) and DFA (google re2) and compare run speed.
+Check either NFA (e.g. re python library) and DFA (google re2) and compare them.
-Submit solution based on DFA library.
+Submit solution based on a better method.
 NOTE: You could extract the polish surnames list, save it to a file, then commit the file to your repository.
 NOTE: You may set max_mem to a higher value than the default in re2 library.
-POINTS: 3
+POINTS: 2
 DEADLINE: 2024-01-27 23:59:59
--- a/TaskG03/nazwiska.txt
+++ b/TaskG03/nazwiska.txt
--- a/TaskG03/polish_wiki_excerpt.exp
+++ b/TaskG03/polish_wiki_excerpt.exp
--- a/TaskG03/polish_wiki_excerpt.in
+++ b/TaskG03/polish_wiki_excerpt.in
--- a/TaskG03/polish_wiki_excerpt.out
+++ b/TaskG03/polish_wiki_excerpt.out
--- a/TaskG03/run.py
+++ b/TaskG03/run.py
@ -0,0 +1,18 @@
 import re2
 import sys
 def load_surnames(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return [line.strip() for line in file]
 polish_surnames = load_surnames('nazwiska.txt')
 setting = re2.Options()
 setting.max_mem = 1 << 30
 pattern = re2.compile('|'.join(polish_surnames), setting)
 for line in sys.stdin:
    if pattern.search(line.lower()):
        print(line.strip()) 
--- a/TaskG04/polish_wiki_excerpt.in.txt
+++ b/TaskG04/polish_wiki_excerpt.in.txt
--- a/TaskG04/polish_wiki_excerpt.out
+++ b/TaskG04/polish_wiki_excerpt.out
--- a/TaskG04/run.py
+++ b/TaskG04/run.py
@ -0,0 +1,17 @@
 import sys
 def binary_to_utf8(binary_input):
    decimal_value = int(binary_input, 2)
    utf8_text = decimal_value.to_bytes((decimal_value.bit_length() + 7) // 8, 'big').decode('utf-8')
    return utf8_text
 for line in sys.stdin:
    if not line.strip():
        print(line, end='')
        continue
    utf8_text = binary_to_utf8(line)
    print(utf8_text)