zadania F

Merge https://github.com/duszekjk/jezykiformalne
Delete TaskG04 directory
2024-01-19 15:20:29 +01:00 · 2024-01-19 13:36:30 +01:00 · 2024-01-08 13:58:09 +01:00 · 2024-01-08 13:56:18 +01:00 · 2024-01-08 13:55:58 +01:00 · 2024-01-08 13:55:38 +01:00
23 changed files with 89429 additions and 10 deletions
--- a/TaskF00/run.py
+++ b/TaskF00/run.py
@ -0,0 +1,25 @@
+import re
+
+def substitute_digits(input_string):
+    def replace_digit(match):
+        digits = match.group()
+        replaced_digits = ''
+        for digit in digits:
+            if '0' <= digit <= '9':
+                replaced_digits += chr(ord('a') + int(digit))
+            else:
+                replaced_digits += digit
+        return replaced_digits
+
+    pattern = re.compile(r'\d{4}')
+    substituted_string = pattern.sub(replace_digit, input_string)
+
+    return substituted_string
+
+
+file_path = 'simple.in'
+with open(file_path, 'r', encoding = 'utf-8') as file:
+    for line in file:
+        line = line.rstrip('\n')
+        result = substitute_digits(line)
+        print(result)
--- a/TaskF01/description.txt
+++ b/TaskF01/description.txt
@ -18,5 +18,3 @@ letter to lower. In this task word means the string of "\w" metacharacters,
 lower case letter is [a-ząćęłńóśźż] class,
 capital case letter is [A-ZĄĆĘŁŃÓŚŹŻ] class.

-POINTS: 2
-DEADLINE: 2020-12-18 23:59:59
--- a/TaskF01/run.py
+++ b/TaskF01/run.py
@ -0,0 +1,10 @@
+import re
+
+def process_file(file_path):
+    with open(file_path, 'r',encoding='utf-8') as file:
+        for line in file:
+            modified_line = re.sub(r'\b(?:[A-Z]+\w*[a-z]+\w*|[a-z]+\w*[A-Z]+\w*)\b', lambda match: match.group(0).swapcase(), line)
+            print(modified_line, end='')
+
+file_path = 'polish_wiki_excerpt.exp'
+process_file(file_path)
--- a/TaskF02/run.py
+++ b/TaskF02/run.py
@ -0,0 +1,23 @@
+import re
+
+def analyze_line(line):
+    result = re.findall(r'([a-ząćęłńóśźż])|([A-ZĄĆĘŁŃÓŚŹŻ])|(\d)|(\s)', line, flags=re.UNICODE)
+
+    lower_case_letters = [match[0] for match in result if match[0]]
+    upper_case_letters = [match[1] for match in result if match[1]]
+    digits = [match[2] for match in result if match[2]]
+    special_characters = [match[3] for match in result if match[3]]
+
+    result = f"{len(lower_case_letters)} {len(upper_case_letters)} {len(digits)} {len(special_characters)}"
+    return result
+
+def process_file(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        for line in file:
+            line = line.rstrip('\n')
+            if line:
+                result = analyze_line(line)
+                print(result)
+
+file_path = 'simple.in' 
+process_file(file_path)
--- a/TaskF03/description.txt
+++ b/TaskF03/description.txt
@ -19,6 +19,3 @@ In this task word means a string of "\w" metacharacters,
 lower case letter is [a-ząćęłńóśźż] class,
 capital case letter is [A-ZĄĆĘŁŃÓŚŹŻ] class capital case letter is [A-ZĄĆĘŁŃÓŚŹŻ] class.

-
-POINTS: 1
-DEADLINE: 2020-12-18 23:59:59
--- a/TaskF03/run.py
+++ b/TaskF03/run.py
@ -0,0 +1,13 @@
+import re
+
+def process_line(line):
+    words = re.findall(r'\b(?:[a-ząćęłńóśźż]+\w*|[A-ZĄĆĘŁŃÓŚŹŻ]+\w*)\b', line, flags=re.UNICODE)
+    lowercase_words = [word for word in words if word[0].islower()]
+    uppercase_words = [word for word in words if word[0].isupper()]
+    return f"{len(lowercase_words)} {len(uppercase_words)}"
+
+file_path = 'simple.in'
+with open(file_path, 'r', encoding='utf-8') as file:
+    for line in file:
+        result = process_line(line.strip())
+        print(result)
--- a/TaskF04/description.txt
+++ b/TaskF04/description.txt
@ -16,5 +16,3 @@ Write the input line with the second digits string deleted.
 Digit is a [0-9] class.


-POINTS: 1
-DEADLINE: 2020-12-18 23:59:59
--- a/TaskF04/run.py
+++ b/TaskF04/run.py
@ -0,0 +1,22 @@
+import re
+
+def delete_second_digits(input_line):
+    regex = r'\b(\d+)\b'
+    numbers = re.findall(regex, input_line)
+    
+    if len(numbers) > 1:
+        second_number = numbers[1]
+        input_line = re.sub(re.escape(second_number), '', input_line, 1)
+
+    return input_line
+
+file_path = 'simple.in'
+output_lines = []
+
+with open(file_path, 'r', encoding='utf-8') as file:
+    for line in file:
+        modified_line = delete_second_digits(line)
+        output_lines.append(modified_line)
+
+for line in output_lines:
+    print(line, end='')
--- a/TaskF04/simple.in
+++ b/TaskF04/simple.in
@ -1,3 +1,6 @@
 Mam 2 jabłka i 35 banananów.
 Mam 2 jabłka i 35 banananów oraz 20 gruszek.
 Widziałem 2 bociany.
+2 35 209
+12 34
+532 234 234 324
--- a/TaskF05/description.txt
+++ b/TaskF05/description.txt
@ -17,6 +17,3 @@ The number of "x" in the "xxx" string should be the same as the
 the number of characters in the input string.
 In this task, a word means a string of "\w" metacharacters.

-
-POINTS: 2
-DEADLINE: 2020-12-18 23:59:59
--- a/TaskF05/run.py
+++ b/TaskF05/run.py
@ -0,0 +1,15 @@
+import re
+
+def process_line(line):
+    words = re.findall(r'\w+', line)
+    if len(words) >= 3:
+        third_word = words[2]
+        replacement = 'x' * len(third_word)
+        line = re.sub(r'\b' + re.escape(third_word) + r'\b', replacement, line)
+
+    return line
+
+with open('simple.in', 'r', encoding='utf-8') as file:
+    for line in file:
+        processed_line = process_line(line)
+        print(processed_line, end='')
--- a/TaskG00/description.txt
+++ b/TaskG00/description.txt
@ -0,0 +1,20 @@
+Use regular expressions to extract lines containing polish surnames.
+
+Download list of polish male and female surnames from here:
+
+
+* https://dane.gov.pl/pl/dataset/1681,nazwiska-osob-zyjacych-wystepujace-w-rejestrze-pesel/resource/35279/table?page=1&per_page=20&q=&sort=
+* https://dane.gov.pl/pl/dataset/1681,nazwiska-osob-zyjacych-wystepujace-w-rejestrze-pesel/resource/22817/table?page=1&per_page=20&q=&sort=
+
+
+Extract lines from stdin containing any of the surnames.
+Look only for surnames in lowercase.
+The surname does not have to be surrounded by space or any other special characters.
+Don't search for declined forms of surnames.
+
+Check either NFA (e.g. re python library) and DFA (google re2) and compare run speed.
+
+Submit solution based on DFA library.
+
+NOTE: You could extract the polish surnames list, save it to a file, then commit the file to your repository.
+NOTE: You may set max_mem to a higher value than the default in re2 library.
--- a/TaskG00/polish_wiki_excerpt.exp
+++ b/TaskG00/polish_wiki_excerpt.exp
--- a/TaskG00/polish_wiki_excerpt.in
+++ b/TaskG00/polish_wiki_excerpt.in
--- a/TaskG01/description.txt
+++ b/TaskG01/description.txt
@ -0,0 +1,41 @@
+Use regular expressions to mark Polish first-person masculine forms.
+
+You should handle the following types of expressions:
+
+* first-person masculine past forms of verbs ("zrobiłem", "pisałem", etc.),
+* first-person singular masculine forms of the verb "być" ("be") combined
+  with singular masculine nominative forms of adjectives ("wysoki", "sprytny", etc.),
+  assuming that the form of the verb "być" is to the left of the adjective, not
+  more than 3 other words,
+* the verb "będę" combined with the past participle (i.e. 3rd person
+  masculine imperfect form, e.g. "robił", pisał"), assuming
+  that "będę" is to the left of the adjective, not
+  more than 3 other words to the left of the participle OR directly
+  to the right of the participle ("robił będę").
+
+The first-person masculine forms should be marked with curly brackets.
+You should mark only the masculine form. Do not mark the form of "być"
+(unless it clearly a masculine form, i.e. for "byłem").
+
+The match should be case-insensitive.
+
+The PoliMorf dictionary of inflected forms should be applied:
+http://zil.ipipan.waw.pl/PoliMorf?action=AttachFile&do=get&target=PoliMorf-0.6.7.tab.gz
+
+Suggested steps:
+
+1. Extract all the needed forms from the PoliMorf dictionary:
+
+* 1st person masculine past forms of verbs, unfortunately
+  this form is not directly present in the lexicon, you need
+  to add "em" to the 3rd person masculine form ("zrobił" => "zrobiłem")
+* singular masculine nominative forms of adjectives
+* masculine past participle (3rd person masculine imperfect forms of verbs)
+
+You could do this using grep/cut commands — to obtain a simple text files
+with a word in each line. You can do this once and commit the 3 files to your repository.
+
+2. In your `run` script/program, read the 3 files and create a large
+expression with alternatives. Use a regexp library based on DFAs (determintistic
+finite-automatons).
+
--- a/TaskG01/simple.exp
+++ b/TaskG01/simple.exp
@ -0,0 +1,25 @@
+Tu nic nie ma.
+Wczoraj {ugotowałem} ziemniaki.
+{Jechałem}, {jechałem} i {jechałem}, a potem się {zatrzymałem}.
+{Umyłem} się mydłem.
+Jestem {wysoki}.
+Jest wysoki.
+Mówią, że jestem od zawsze niezwykle {sprytny}.
+aaaa {byłem} aaa {zielony} ddd
+aaaa {byłem} aaa bbb {zielony} ddd
+aaaa {byłem} aaa bbb ccc {zielony} ddd
+aaaa {byłem} aaa bbb ccc ddd zielony ddd
+aaaa był aaa bbb zielony ddd
+aaaa byłam aaa bbb zielony ddd
+aaaa byłam aaa bbb zielona ddd
+teraz będę {pisał} książkę
+będę teraz {pisał} książkę
+będę teraz dla ciebie {pisał} książkę
+teraz dla ciebie {pisał} będę księżkę
+będę i on napisał książkę
+aaa będę {śpiewał} bbb
+aaa będę ccc {śpiewał} bbb
+aaa będę ccc ddd {śpiewał} bbb
+aaa będę ccc ddd eee {śpiewał} bbb
+aaa będę ccc ddd eee fff śpiewał bbb
+{pływałem} i {biegałem}
--- a/TaskG01/simple.in
+++ b/TaskG01/simple.in
@ -0,0 +1,25 @@
+Tu nic nie ma.
+Wczoraj ugotowałem ziemniaki.
+Jechałem, jechałem i jechałem, a potem się zatrzymałem.
+Umyłem się mydłem.
+Jestem wysoki.
+Jest wysoki.
+Mówią, że jestem od zawsze niezwykle sprytny.
+aaaa byłem aaa zielony ddd
+aaaa byłem aaa bbb zielony ddd
+aaaa byłem aaa bbb ccc zielony ddd
+aaaa byłem aaa bbb ccc ddd zielony ddd
+aaaa był aaa bbb zielony ddd
+aaaa byłam aaa bbb zielony ddd
+aaaa byłam aaa bbb zielona ddd
+teraz będę pisał książkę
+będę teraz pisał książkę
+będę teraz dla ciebie pisał książkę
+teraz dla ciebie pisał będę księżkę
+będę i on napisał książkę
+aaa będę śpiewał bbb
+aaa będę ccc śpiewał bbb
+aaa będę ccc ddd śpiewał bbb
+aaa będę ccc ddd eee śpiewał bbb
+aaa będę ccc ddd eee fff śpiewał bbb
+pływałem i biegałem
--- a/TaskG02/description.txt
+++ b/TaskG02/description.txt
@ -0,0 +1,40 @@
+Use regular expressions to mark Polish first-person feminine forms.
+
+You should handle the following types of expressions:
+
+* first-person feminine past forms of verbs ("zrobiłam", "pisałam", etc.),
+* first-person singular feminine forms of the verb "być" ("be") combined
+  with singular feminine nominative forms of adjectives ("wysoka", "sprytna", etc.),
+  assuming that the form of the verb "być" is to the left of the adjective, not
+  more than 3 other words,
+* the verb "będę" combined with the past participle (i.e. 3rd person
+  feminine imperfect form, e.g. "robiła", pisała"), assuming
+  that "będę" is to the left of the adjective, not
+  more than 3 other words to the left of the participle OR directly
+  to the right of the participle ("robiła będę").
+
+The first-person feminine forms should be marked with curly brackets.
+You should mark only the feminine form. Do not mark the form of "być"
+(unless it clearly a feminine form, i.e. for "byłam").
+
+The match should be case-insensitive.
+
+The PoliMorf dictionary of inflected forms should be applied:
+http://zil.ipipan.waw.pl/PoliMorf?action=AttachFile&do=get&target=PoliMorf-0.6.7.tab.gz
+
+Suggested steps:
+
+1. Extract all the needed forms from the PoliMorf dictionary:
+
+* 1st person feminine past forms of verbs, unfortunately
+  this form is not directly present in the lexicon, you need
+  to add "m" to the 3rd person feminine form ("zrobiła" => "zrobiłam")
+* singular feminine nominative forms of adjectives
+* feminine past participle (3rd person feminine imperfect forms of verbs)
+
+You could do this using grep/cut commands — to obtain a simple text files
+with a word in each line. You can do this once and commit the 3 files to your repository.
+
+2. In your `run` script/program, read the 3 files and create a large
+expression with alternatives. Use a regexp library based on DFAs (determintistic
+finite-automatons).
--- a/TaskG02/simple.exp
+++ b/TaskG02/simple.exp
@ -0,0 +1,25 @@
+Tu nic nie ma.
+Wczoraj {ugotowałam} ziemniaki.
+{Jechałam}, {jechałam} i {jechałam}, a potem się {zatrzymałam}.
+{Umyłam} się mydłem i bam, złam się.
+Jestem {wysoka}.
+Jest {wysoka}.
+Mówią, że jestem od zawsze niezwykle {sprytna}.
+aaaa {byłam} aaa {zielona} ddd
+aaaa {byłam} aaa bbb {zielona} ddd
+aaaa {byłam} aaa bbb ccc {zielona} ddd
+aaaa {byłam} aaa bbb ccc ddd zielona ddd
+aaaa była aaa bbb zielona ddd
+aaaa byłem aaa bbb zielona ddd
+aaaa byłem aaa bbb zielony ddd
+teraz będę {pisała} książkę
+będę teraz {pisała} książkę
+będę teraz dla ciebie {pisała} książkę
+teraz dla ciebie {pisała} będę księżkę
+będę i ona napisała książkę
+aaa będę {śpiewała} bbb
+aaa będę ccc {śpiewała} bbb
+aaa będę ccc ddd {śpiewała} bbb
+aaa będę ccc ddd eee {śpiewała} bbb
+aaa będę ccc ddd eee fff śpiewała bbb
+{pływałam} i {biegałam}
--- a/TaskG02/simple.in
+++ b/TaskG02/simple.in
@ -0,0 +1,25 @@
+Tu nic nie ma.
+Wczoraj ugotowałam ziemniaki.
+Jechałam, jechałam i jechałam, a potem się zatrzymałam.
+Umyłam się mydłem i bam, złam się.
+Jestem wysoka.
+Jest wysoka.
+Mówią, że jestem od zawsze niezwykle sprytna.
+aaaa byłam aaa zielona ddd
+aaaa byłam aaa bbb zielona ddd
+aaaa byłam aaa bbb ccc zielona ddd
+aaaa byłam aaa bbb ccc ddd zielona ddd
+aaaa była aaa bbb zielona ddd
+aaaa byłem aaa bbb zielona ddd
+aaaa byłem aaa bbb zielony ddd
+teraz będę pisała książkę
+będę teraz pisała książkę
+będę teraz dla ciebie pisała książkę
+teraz dla ciebie pisała będę księżkę
+będę i ona napisała książkę
+aaa będę śpiewała bbb
+aaa będę ccc śpiewała bbb
+aaa będę ccc ddd śpiewała bbb
+aaa będę ccc ddd eee śpiewała bbb
+aaa będę ccc ddd eee fff śpiewała bbb
+pływałam i biegałam
--- a/TaskG03/description.txt
+++ b/TaskG03/description.txt
@ -0,0 +1,20 @@
+Use regular expressions to extract lines containing polish surnames. CASE INSENSITIVE
+
+Download list of polish male and female surnames from here:
+
+
+* https://dane.gov.pl/pl/dataset/1681,nazwiska-osob-zyjacych-wystepujace-w-rejestrze-pesel/resource/35279/table?page=1&per_page=20&q=&sort=
+* https://dane.gov.pl/pl/dataset/1681,nazwiska-osob-zyjacych-wystepujace-w-rejestrze-pesel/resource/22817/table?page=1&per_page=20&q=&sort=
+
+
+Extract lines from stdin containing any of the surnames.
+Look only for surnames no matter casing (case insensitive).
+The surname does not have to be surrounded by space or any other special characters.
+Don't search for declined forms of surnames.
+
+Check either NFA (e.g. re python library) and DFA (google re2) and compare them.
+
+Submit solution based on a better method.
+
+NOTE: You could extract the polish surnames list, save it to a file, then commit the file to your repository.
+NOTE: You may set max_mem to a higher value than the default in re2 library.
--- a/TaskG03/polish_wiki_excerpt.exp
+++ b/TaskG03/polish_wiki_excerpt.exp
--- a/TaskG03/polish_wiki_excerpt.in
+++ b/TaskG03/polish_wiki_excerpt.in
Author	SHA1	Message	Date
Weranda	b8ff9bedad	zadania F	2024-01-19 15:20:29 +01:00
Weranda	69b862a2d2	Merge https://github.com/duszekjk/jezykiformalne	2024-01-19 13:36:30 +01:00
Jacek Kałużny	dc0392f7d6	Delete TaskG04 directory	2024-01-08 13:58:09 +01:00
Jacek Kałużny	ae508b36b1	Update description.txt	2024-01-08 13:56:18 +01:00
Jacek Kałużny	cd6b06cefa	Update description.txt	2024-01-08 13:55:58 +01:00
Jacek Kałużny	c818e6742f	Update description.txt	2024-01-08 13:55:38 +01:00
Jacek Kałużny	6b53fddd75	Update description.txt	2024-01-08 13:54:44 +01:00
Jacek Kałużny	8fb54512d7	Add files via upload	2024-01-08 11:27:35 +01:00