F fixes

2024-01-14 17:25:11 +01:00 · 2024-01-14 17:25:11 +01:00 · 566bb7c07c
commit 566bb7c07c
parent bc7afb24c1
38 changed files with 489557 additions and 78 deletions
--- a/.idea/encodings.xml
+++ b/.idea/encodings.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Encoding">
+    <file url="file://$PROJECT_DIR$/TaskF05/simple.out" charset="windows-1252" />
+  </component>
+</project>
--- a/README.md
+++ b/README.md
@ -10,13 +10,13 @@ Gdyby była potrzeba przedyskutowania czegoś to możemy zostać po zajęciach.
 W celu zaliczenia przedmiotu należy zdobyć punkty za zadania na laboratoriach oraz zaliczyć kolokwium.
 Punktowane zadania będziemy wykonywać na laboratoriach oraz po nich (przed następnymi zajęciami), ich ilość determinuje ocenę.
 Oprócz tego należy zaliczyć kolokwium z wyrażeń regularnych na ostatnich zajęciach. Sam wynik kolokwium 
-nie będzie wpływał na ocenę, ale bez zdanego kolowkium nie da się zaliczyć przedmiotu. Punktacja za zadania jest następująca:
-  - mniej niż 30 punktów - 2
-  - 30-34- 3
-  - 35-39- 3.5
-  - 40-44- 4
-  - 45-49- 4.5
-  - więcej niż 49- 5
+nie będzie wpływał na ocenę, ale bez zdanego kolokwium nie da się zaliczyć przedmiotu. Punktacja za zadania jest następująca:
+  - mniej niż 29 punktów - 2
+  - 29-33- 3
+  - 34-38- 3.5
+  - 39-43- 4
+  - 44-48- 4.5
+  - więcej niż 48- 5

 #### Wysyłanie zadań 

@ -239,3 +239,156 @@ re.search('\\', r'a\bc')
 re.search(r'\\', r'a\bc')
 re.search('\\\\', r'a\bc')
 ```
+## Zajęcia 4 27.11.2023 Wyrażenia regularne 2
+
+E00 - E09 - po jedno dla każdego
+
+E10 - E36 - po jedno dla każdego
+
+E37 - E43 - po jedno dla każdego
+
+E44 - E48 - po jedno dla każdego
+
+## Zajęcia 5 11.12.2023 Wyrażenia regularne 3
+
+F00 - F05 - do wykonania przez każdego
+
+Proszę o przekopiowanie sobie pliku polish_wiki_excerpt.in z zadania F00 do katalogów z pozostałymi zadaniami
+
+#### RE SUB
+```
+re.sub(pattern, replacement, string)
+
+re.sub('a','b', 'ala ma kota')
+```
+
+#### backreferencje:
+
+```
+
+re.search(r' \d+ \d+', 'ala ma 41 41 kota')
+re.search(r' \d+ \d+', 'ala ma 41 123 kota')
+re.search(r' (\d+) \1', 'ala ma 41 41 kota')
+re.search(r' (\d+) \1', 'ala ma 41 123 kota')
+```
+
+#### lookahead ( to sa takie assercje):
+```
+re.search(r'ma kot', 'ala ma kot')
+re.search(r'ma kot(?=[ay])', 'ala ma kot')
+re.search(r'ma kot(?=[ay])', 'ala ma kotka')
+re.search(r'ma kot(?=[ay])', 'ala ma koty')
+re.search(r'ma kot(?=[ay])', 'ala ma kota')
+
+re.search(r'ma kot(?![ay])', 'ala ma kot')
+re.search(r'ma kot(?![ay])', 'ala ma kotka')
+re.search(r'ma kot(?![ay])', 'ala ma koty')
+re.search(r'ma kot(?![ay])', 'ala ma kota')
+```
+
+#### named groups
+```
+r = re.search(r'ma (?P<ilepsow>\d+) kotow i (?P<ilekotow>\d+) psow', 'ala ma 100 kotow i 200 psow')
+r.groups()
+r.groups('ilepsow')
+r.groups('ilekotow')
+```
+
+#### re.split
+```
+('a,b.c,d').split(',')
+('a,b.c,d').split(',')
+('a,b.c,d').split(',.')
+re.split(r',', 'a,b.c,d') 
+re.split(r'[.,]', 'a,b.c,d') 
+```
+#### \w word character
+```
+\w - matchuje Unicod word character , jeżeli flaga ASCII to [a-zA-Z0-9_]
+\w - odwrotne do \W, jezeli flaga ASCI to [^a-zA-Z0-9_]
+
+re.findall(r'\w+', 'ala ma 3 koty.')
+re.findall(r'\W+', 'ala ma 3 koty.')
+```
+#### początek albo koniec słowa | word boundary
+```
+re.search(r'\bkot\b', 'Ala ma kota')
+re.search(r'\bkot\b', 'Ala ma kot')
+re.search(r'\bkot\b', 'Ala ma kot.')
+re.search(r'\bkot\b', 'Ala ma kot ')
+
+re.search(r'\Bot\B', 'Ala ma kot ')
+re.search(r'\Bot\B', 'Ala ma kota ')
+```
+#### MULTILINE
+```
+re.findall(r'^Ma', 'Ma kota Ala\nMa psa Jacek') 
+re.findall(r'^Ma', 'Ma kota Ala\nMa psa Jacek', re.MULTILINE)
+```
+F00 - F05 - do wykonania przez każdego
+
+
+## Zajęcia 6 8.01.2024 Kodowanie i re2
+
+Proszę o przekopiowanie sobie pliku polish_wiki_excerpt.in z zadania F00 do katalogów G00, G03.
+
+Instalacja biblioteki re2: https://pypi.org/project/google-re2/
+
+### DFA i NDFA
+
+```
+import re2 as re
+n = 50
+regexp =  "a?"*n+"a"*n
+s = "a"*n
+re.match(regexp, s)
+```
+
+```
+re.match(r"(\d)abc\1", "3abc3") # re2 nie obsługuje backreferencji
+```
+
+re2 max memory - podniesienie limitu
+```
+setting = re2.Options()
+setting.max_mem = 1 << 30 # to jest rozmiar podany w bajtach, czyli tutaj 1GB - to jest maksimum o ile możemy podnieść limit
+pattern = re2.compile(regexp, setting)
+```
+
+time # mierzenie czasu działania
+```
+start = time.time()
+withre2()
+d1 = t1ime.time() - start
+print(f'That took {d1:.2f} seconds.\n')
+```
+Gdyby ktoś chciał poczytać więcej:
+https://swtch.com/~rsc/regexp/regexp1.html
+
+### UTF-8
+```
+c = "ℋ"
+ord(c)
+chr(8459)
+8* 16**2 + 0 * 16**(1) + 0*16**(0)
+15*16**3 + 15* 16**2 + 15 * 16**(1) + 15*16**(0)
+```
+
+```
+xxd -b file
+xxd  file
+```
+
+## KOLOKWIUM 2024-01-22
+Operatory, obowiązujące na kolokwium
+====================================
+
+* kwantyfikatory `-` `*` `+` `?` `{n}` `{n,}` `{n, m}`
+* alternatywa — `|`
+* klasy znaków — `[...]`
+* zanegowane klasy znaków — `[^...]`
+* dowolny znak — `.`
+* unieważnianie znaków specjalnych — \
+* operatory zakotwiczające — `^` `$`
+
+W repozytorium znajdują się przykładowe pliki z zadaniami.
--- a/TaskF00/polish_wiki_excerpt.out
+++ b/TaskF00/polish_wiki_excerpt.out
--- a/TaskF00/run.py
+++ b/TaskF00/run.py
@ -1,7 +1,6 @@
 import sys, re

 for line in sys.stdin:
-    line = line.strip()
    list_words = re.findall(r'[0-9]{4}', line)
    for word in list_words:
        modified_word = re.sub('0', 'a', word)
@ -15,4 +14,7 @@ for line in sys.stdin:
        modified_word = re.sub('8', 'i', modified_word)
        modified_word = re.sub('9', 'j', modified_word)
        line = re.sub(word, modified_word, line)
-    print(line)
+    sys.stdout.write(line)
+    sys.stdout.flush()
+
+    #run.py < simple.in > simple.out
--- a/TaskF00/simple.out
+++ b/TaskF00/simple.out
@ -0,0 +1,3 @@
+dece 34  dfd gfd 5
+f33sdfsdbcdedsfsdf
+3r
--- a/TaskF01/polish_wiki_excerpt.out
+++ b/TaskF01/polish_wiki_excerpt.out
--- a/TaskF01/run.py
+++ b/TaskF01/run.py
@ -1,58 +1,13 @@
 import re
 import sys

-delimiters = ["'", '"', '’', '-', ':', "|", ".", ",", " "]
-def split_with_multiple_delimiters(text):
-    list = []
-    previousI = 0
-    for i, char in enumerate(text):
-        if char in delimiters:
-            list.append(text[previousI:i])
-            previousI = i
-        if i == len(text)-1:
-            list.append(text[previousI:i+1])
-    return list
+
+def switch_case(m):
+    return m.group(0).swapcase()


-def convert_case(original_line):
-    words = original_line.split()
-    changed_line = r''
-    for word in words:
-        word = word.strip()
-        split_word = split_with_multiple_delimiters(word)
-        for s_word in split_word:
-            converted_word = r''
-            lower_set = False
-            upper_set = False
-            for char in s_word:
-                if re.match(r'[a-ząćęłńóśźż]', char):
-                    lower_set = True
-                elif re.match(r'[A-ZĄĆĘŁŃÓŚŹŻ]', char):
-                    upper_set = True
-                else:
-                    continue
-            if upper_set and lower_set:
-                for char in s_word:
-                    if char.islower():
-                        if re.match(r'[a-ząćęłńóśźż]', char):
-                            converted_word += char.upper()
-                        else:
-                            converted_word += char
-                    elif char.isupper():
-                        if re.match(r'[A-ZĄĆĘŁŃÓŚŹŻ]', char):
-                            converted_word += char.lower()
-                        else:
-                            converted_word += char
-                    else:
-                        converted_word += char
-                changed_line += converted_word
-            else:
-                changed_line += s_word
-        changed_line += ' '
-    return changed_line
-
-
-for line in sys.stdin:
-    line = line.strip('\n')
-    converted_line = convert_case(line).strip()
-    print(converted_line)
+if __name__ == "__main__":
+    for _, l in enumerate(sys.stdin, start=1):
+        p = re.compile(r'\b(?:[a-ząćęłńóśźż]+[A-ZĄĆĘŁŃÓŚŹŻ]|[A-ZĄĆĘŁŃÓŚŹŻ]+[a-ząćęłńóśźż]+)\w*\b')
+        r = p.sub(switch_case, l)
+        print(r, end="")
--- a/TaskF01/simple.out
+++ b/TaskF01/simple.out
@ -0,0 +1,3 @@
+ala mA KOTa
+lallaa
+Żuk
--- a/TaskF02/polish_wiki_excerpt.out
+++ b/TaskF02/polish_wiki_excerpt.out
--- a/TaskF02/simple.out
+++ b/TaskF02/simple.out
@ -0,0 +1,3 @@
+7 2 0 2
+6 0 0 0
+5 1 1 4
--- a/TaskF03/polish_wiki_excerpt.out
+++ b/TaskF03/polish_wiki_excerpt.out
--- a/TaskF03/simple.out
+++ b/TaskF03/simple.out
@ -0,0 +1,2 @@
+2 1
+1 0
--- a/TaskF05/run.py
+++ b/TaskF05/run.py
@ -3,16 +3,20 @@ import sys


 def replaceThirdWordWithX(input):
-    group1 = input.group(1)
-    empty1 = input.group(2)
-    group2 = input.group(3)
-    empty2 = input.group(4)
-    group3 = "x" * (len(input.group(5)))
-    rest = input.group(6)
-    return f'{group1}{empty1}{group2}{empty2}{group3}{rest}'
+    empty0 = input.group(1)
+    group1 = input.group(2)
+    empty1 = input.group(3)
+    group2 = input.group(4)
+    empty2 = input.group(5)
+    group3 = "x" * (len(input.group(6)))
+    rest = input.group(7)
+    if empty0 is None:
+        return f'{group1}{empty1}{group2}{empty2}{group3}{rest}'
+    else:
+        return f'{empty0}{group1}{empty1}{group2}{empty2}{group3}{rest}'


-pattern = r'(\w+)(\W+)(\w+)(\W+)(\w+)(.*)'
+pattern = r'(\W+)?(\w+)(\W+)(\w+)(\W+)(\w+)(.*)'

 for line in sys.stdin:
    line = line.strip('\n')
@ -20,7 +24,6 @@ for line in sys.stdin:
    match = re.match(pattern, line)
    if match:
        result = replaceThirdWordWithX(match)
-        print(re.sub(pattern, result, line))
+        print(result)
    else:
        print(line)
-
--- a/TaskF05/simple.in
+++ b/TaskF05/simple.in
@ -1,2 +1,2 @@
-Mam 2 jabłka i 35 banananów.
+"Mam 2 jabłka i 35 banananów.
 Widziałem 2 bociany.
--- a/TaskF05/simple.out
+++ b/TaskF05/simple.out
@ -0,0 +1,2 @@
+"Mam 2 xxxx‚ka i 35 banananĂłw.
+WidziaĹ‚em x bociany.
--- a/TaskG00/description.txt
+++ b/TaskG00/description.txt
@ -0,0 +1,23 @@
+Use regular expressions to extract lines containing polish surnames.
+
+Download list of polish male and female surnames from here:
+
+
+* https://dane.gov.pl/pl/dataset/1681,nazwiska-osob-zyjacych-wystepujace-w-rejestrze-pesel/resource/35279/table?page=1&per_page=20&q=&sort=
+* https://dane.gov.pl/pl/dataset/1681,nazwiska-osob-zyjacych-wystepujace-w-rejestrze-pesel/resource/22817/table?page=1&per_page=20&q=&sort=
+
+
+Extract lines from stdin containing any of the surname.
+Look only for surnames in lowercase.
+The surname does not have to be surrounded by space or any other special characters.
+Don't search for declined forms of surnames.
+
+Check either NFA (e.g. re python library) and DFA (google re2) and compare run speed.
+
+Submit solution based on DFA library.
+
+NOTE: You could extract the polish surnames list, save it to a file, then commit the file to your repository.
+NOTE: You may set max_mem to a higher value than the default in re2 library.
+
+POINTS: 3
+DEADLINE: 2024-01-27 23:59:59
--- a/TaskG00/polish_wiki_excerpt.exp
+++ b/TaskG00/polish_wiki_excerpt.exp
--- a/TaskG01/description.txt
+++ b/TaskG01/description.txt
@ -0,0 +1,44 @@
+Use regular expressions to mark Polish first-person masculine forms.
+
+You should handle the following types of expressions:
+
+* first-person masculine past forms of verbs ("zrobiłem", "pisałem", etc.),
+* first-person singular masculine forms of the verb "być" ("be") combined
+  with singular masculine nominative forms of adjectives ("wysoki", "sprytny", etc.),
+  assuming that the form of the verb "być" is to the left of the adjective, not
+  more than 3 other words,
+* the verb "będę" combined with the past participle (i.e. 3rd person
+  masculine imperfect form, e.g. "robił", pisał"), assuming
+  that "będę" is to the left of the adjective, not
+  more than 3 other words to the left of the participle OR directly
+  to the right of the participle ("robił będę").
+
+The first-person masculine forms should be marked with curly brackets.
+You should mark only the masculine form. Do not mark the form of "być"
+(unless it clearly a masculine form, i.e. for "byłem").
+
+The match should be case-insensitive.
+
+The PoliMorf dictionary of inflected forms should be applied:
+http://zil.ipipan.waw.pl/PoliMorf?action=AttachFile&do=get&target=PoliMorf-0.6.7.tab.gz
+
+Suggested steps:
+
+1. Extract all the needed forms from the PoliMorf dictionary:
+
+* 1st person masculine past forms of verbs, unfortunately
+  this form is not directly present in the lexicon, you need
+  to add "em" to the 3rd person masculine form ("zrobił" => "zrobiłem")
+* singular masculine nominative forms of adjectives
+* masculine past participle (3rd person masculine imperfect forms of verbs)
+
+You could do this using grep/cut commands — to obtain a simple text files
+with a word in each line. You can do this once and commit the 3 files to your repository.
+
+2. In your `run` script/program, read the 3 files and create a large
+expression with alternatives. Use a regexp library based on DFAs (determintistic
+finite-automatons).
+
+POINTS: 4
+DEADLINE: 2024-01-27 23:59:59
+REMAINDER: 0/2
--- a/TaskG01/simple.exp
+++ b/TaskG01/simple.exp
@ -0,0 +1,25 @@
+Tu nic nie ma.
+Wczoraj {ugotowałem} ziemniaki.
+{Jechałem}, {jechałem} i {jechałem}, a potem się {zatrzymałem}.
+{Umyłem} się mydłem.
+Jestem {wysoki}.
+Jest wysoki.
+Mówią, że jestem od zawsze niezwykle {sprytny}.
+aaaa {byłem} aaa {zielony} ddd
+aaaa {byłem} aaa bbb {zielony} ddd
+aaaa {byłem} aaa bbb ccc {zielony} ddd
+aaaa {byłem} aaa bbb ccc ddd zielony ddd
+aaaa był aaa bbb zielony ddd
+aaaa byłam aaa bbb zielony ddd
+aaaa byłam aaa bbb zielona ddd
+teraz będę {pisał} książkę
+będę teraz {pisał} książkę
+będę teraz dla ciebie {pisał} książkę
+teraz dla ciebie {pisał} będę księżkę
+będę i on napisał książkę
+aaa będę {śpiewał} bbb
+aaa będę ccc {śpiewał} bbb
+aaa będę ccc ddd {śpiewał} bbb
+aaa będę ccc ddd eee {śpiewał} bbb
+aaa będę ccc ddd eee fff śpiewał bbb
+{pływałem} i {biegałem}
--- a/TaskG01/simple.in
+++ b/TaskG01/simple.in
@ -0,0 +1,25 @@
+Tu nic nie ma.
+Wczoraj ugotowałem ziemniaki.
+Jechałem, jechałem i jechałem, a potem się zatrzymałem.
+Umyłem się mydłem.
+Jestem wysoki.
+Jest wysoki.
+Mówią, że jestem od zawsze niezwykle sprytny.
+aaaa byłem aaa zielony ddd
+aaaa byłem aaa bbb zielony ddd
+aaaa byłem aaa bbb ccc zielony ddd
+aaaa byłem aaa bbb ccc ddd zielony ddd
+aaaa był aaa bbb zielony ddd
+aaaa byłam aaa bbb zielony ddd
+aaaa byłam aaa bbb zielona ddd
+teraz będę pisał książkę
+będę teraz pisał książkę
+będę teraz dla ciebie pisał książkę
+teraz dla ciebie pisał będę księżkę
+będę i on napisał książkę
+aaa będę śpiewał bbb
+aaa będę ccc śpiewał bbb
+aaa będę ccc ddd śpiewał bbb
+aaa będę ccc ddd eee śpiewał bbb
+aaa będę ccc ddd eee fff śpiewał bbb
+pływałem i biegałem
--- a/TaskG02/description.txt
+++ b/TaskG02/description.txt
@ -0,0 +1,44 @@
+Use regular expressions to mark Polish first-person feminine forms.
+
+You should handle the following types of expressions:
+
+* first-person feminine past forms of verbs ("zrobiłam", "pisałam", etc.),
+* first-person singular feminine forms of the verb "być" ("be") combined
+  with singular feminine nominative forms of adjectives ("wysoka", "sprytna", etc.),
+  assuming that the form of the verb "być" is to the left of the adjective, not
+  more than 3 other words,
+* the verb "będę" combined with the past participle (i.e. 3rd person
+  feminine imperfect form, e.g. "robiła", pisała"), assuming
+  that "będę" is to the left of the adjective, not
+  more than 3 other words to the left of the participle OR directly
+  to the right of the participle ("robiła będę").
+
+The first-person feminine forms should be marked with curly brackets.
+You should mark only the feminine form. Do not mark the form of "być"
+(unless it clearly a feminine form, i.e. for "byłam").
+
+The match should be case-insensitive.
+
+The PoliMorf dictionary of inflected forms should be applied:
+http://zil.ipipan.waw.pl/PoliMorf?action=AttachFile&do=get&target=PoliMorf-0.6.7.tab.gz
+
+Suggested steps:
+
+1. Extract all the needed forms from the PoliMorf dictionary:
+
+* 1st person feminine past forms of verbs, unfortunately
+  this form is not directly present in the lexicon, you need
+  to add "m" to the 3rd person feminine form ("zrobiła" => "zrobiłam")
+* singular feminine nominative forms of adjectives
+* feminine past participle (3rd person feminine imperfect forms of verbs)
+
+You could do this using grep/cut commands — to obtain a simple text files
+with a word in each line. You can do this once and commit the 3 files to your repository.
+
+2. In your `run` script/program, read the 3 files and create a large
+expression with alternatives. Use a regexp library based on DFAs (determintistic
+finite-automatons).
+
+POINTS: 4
+DEADLINE: 2024-01-27 23:59:59
+REMAINDER: 1/2
--- a/TaskG02/simple.exp
+++ b/TaskG02/simple.exp
@ -0,0 +1,25 @@
+Tu nic nie ma.
+Wczoraj {ugotowałam} ziemniaki.
+{Jechałam}, {jechałam} i {jechałam}, a potem się {zatrzymałam}.
+{Umyłam} się mydłem i bam, złam się.
+Jestem {wysoka}.
+Jest {wysoka}.
+Mówią, że jestem od zawsze niezwykle {sprytna}.
+aaaa {byłam} aaa {zielona} ddd
+aaaa {byłam} aaa bbb {zielona} ddd
+aaaa {byłam} aaa bbb ccc {zielona} ddd
+aaaa {byłam} aaa bbb ccc ddd zielona ddd
+aaaa była aaa bbb zielona ddd
+aaaa byłem aaa bbb zielona ddd
+aaaa byłem aaa bbb zielony ddd
+teraz będę {pisała} książkę
+będę teraz {pisała} książkę
+będę teraz dla ciebie {pisała} książkę
+teraz dla ciebie {pisała} będę księżkę
+będę i ona napisała książkę
+aaa będę {śpiewała} bbb
+aaa będę ccc {śpiewała} bbb
+aaa będę ccc ddd {śpiewała} bbb
+aaa będę ccc ddd eee {śpiewała} bbb
+aaa będę ccc ddd eee fff śpiewała bbb
+{pływałam} i {biegałam}
--- a/TaskG02/simple.in
+++ b/TaskG02/simple.in
@ -0,0 +1,25 @@
+Tu nic nie ma.
+Wczoraj ugotowałam ziemniaki.
+Jechałam, jechałam i jechałam, a potem się zatrzymałam.
+Umyłam się mydłem i bam, złam się.
+Jestem wysoka.
+Jest wysoka.
+Mówią, że jestem od zawsze niezwykle sprytna.
+aaaa byłam aaa zielona ddd
+aaaa byłam aaa bbb zielona ddd
+aaaa byłam aaa bbb ccc zielona ddd
+aaaa byłam aaa bbb ccc ddd zielona ddd
+aaaa była aaa bbb zielona ddd
+aaaa byłem aaa bbb zielona ddd
+aaaa byłem aaa bbb zielony ddd
+teraz będę pisała książkę
+będę teraz pisała książkę
+będę teraz dla ciebie pisała książkę
+teraz dla ciebie pisała będę księżkę
+będę i ona napisała książkę
+aaa będę śpiewała bbb
+aaa będę ccc śpiewała bbb
+aaa będę ccc ddd śpiewała bbb
+aaa będę ccc ddd eee śpiewała bbb
+aaa będę ccc ddd eee fff śpiewała bbb
+pływałam i biegałam
--- a/TaskG02/simple.out
+++ b/TaskG02/simple.out
--- a/TaskG03/description.txt
+++ b/TaskG03/description.txt
@ -0,0 +1,23 @@
+Use regular expressions to extract lines containing polish surnames. CASE INSENSITIVE
+
+Download list of polish male and female surnames from here:
+
+
+* https://dane.gov.pl/pl/dataset/1681,nazwiska-osob-zyjacych-wystepujace-w-rejestrze-pesel/resource/35279/table?page=1&per_page=20&q=&sort=
+* https://dane.gov.pl/pl/dataset/1681,nazwiska-osob-zyjacych-wystepujace-w-rejestrze-pesel/resource/22817/table?page=1&per_page=20&q=&sort=
+
+
+Extract lines from stdin containing any of the surnames.
+Look only for surnames no matter casing (case insensitive).
+The surname does not have to be surrounded by space or any other special characters.
+Don't search for declined forms of surnames.
+
+Check either NFA (e.g. re python library) and DFA (google re2) and compare them.
+
+Submit solution based on a better method.
+
+NOTE: You could extract the polish surnames list, save it to a file, then commit the file to your repository.
+NOTE: You may set max_mem to a higher value than the default in re2 library.
+
+POINTS: 2
+DEADLINE: 2024-01-27 23:59:59
--- a/TaskG03/polish_wiki_excerpt.exp
+++ b/TaskG03/polish_wiki_excerpt.exp
--- a/TaskG04/description.txt
+++ b/TaskG04/description.txt
@ -0,0 +1,8 @@
+Na wejściu dostajemy bajty tekstu UTF-8 zapisane w zwykłej postaci tekstu
+(Zatem obługujemy strumień wejścia z 0 i 1 w postaci tekstowej, a nie strumień bitów).
+
+Przekonwertuj plik na tekst UTF-8.
+
+POINTS: 2
+DEADLINE: 2024-01-21 23:59:59
+REMAINDER: 0/2
--- a/TaskG04/polish_wiki_excerpt.exp
+++ b/TaskG04/polish_wiki_excerpt.exp
--- a/TaskG04/polish_wiki_excerpt.in
+++ b/TaskG04/polish_wiki_excerpt.in
--- a/TaskG05/description.txt
+++ b/TaskG05/description.txt
@ -0,0 +1,8 @@
+Na wejściu dostajemy bajty w formacie hex tekstu UTF-8 zapisane w zwykłej postaci tekstu
+(Zatem obługujemy strumień wejścia w postaci tekstowej, a nie strumień bitów).
+
+Przekonwertuj plik na tekst UTF-8.
+
+POINTS: 2
+DEADLINE: 2024-01-21 23:59:59
+REMAINDER: 1/2
--- a/TaskG05/polish_wiki_excerpt.exp
+++ b/TaskG05/polish_wiki_excerpt.exp
--- a/TaskG05/polish_wiki_excerpt.in
+++ b/TaskG05/polish_wiki_excerpt.in
--- a/TaskG05/polish_wiki_excerpt.out
+++ b/TaskG05/polish_wiki_excerpt.out
--- a/daut2010.pdf
+++ b/daut2010.pdf
--- a/daut2010_odpowiedzi.pdf
+++ b/daut2010_odpowiedzi.pdf
--- a/daut2011.pdf
+++ b/daut2011.pdf
--- a/daut2011_odpowiedzi.pdf
+++ b/daut2011_odpowiedzi.pdf
--- a/run_report.py
+++ b/run_report.py
@ -41,7 +41,7 @@ def get_index():

 def is_task_set_correct(dir, task_set):
    try:
-        with open(Path(dir, f'{task_set}.out')) as f_out, open(Path(dir, f'{task_set}.exp')) as f_exp:
+        with open(Path(dir, f'{task_set}.out'),encoding='utf-8') as f_out, open(Path(dir, f'{task_set}.exp'),encoding='utf-8') as f_exp:
            f_out_lines = ''.join(f_out.readlines())
            f_exp_lines = ''.join(f_exp.readlines())
            return f_out_lines == f_exp_lines
@ -81,7 +81,7 @@ def execute_all_tasks(tasks):


 def get_task_points(dir):
-    with open(Path(dir, 'description.txt')) as f_in:
+    with open(Path(dir, 'description.txt'), encoding='utf-8') as f_in:
        lines = f_in.readlines()
        points = int([x for x in lines if x.startswith('POINTS')][0].split(' ')[-1].rstrip())
        return points
@ -106,6 +106,6 @@ def print_report(report):

 INDEX = get_index()
 tasks = get_tasks(INDEX)
-execute_all_tasks(tasks)
+#execute_all_tasks(tasks)
 report = get_report(tasks)
 print_report(report)