exercise D

2023-11-26 12:43:04 +01:00 · 2023-11-26 12:43:04 +01:00 · abd0e8fd7f
commit abd0e8fd7f
parent 5c8fc0ab48 a84a1dcf10
24 changed files with 265 additions and 14000 deletions
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
--- a/README.md
+++ b/README.md
@ -52,3 +52,190 @@ B05 - jedno dla wszystkich

 C00 - zadanie wykonywane wspólnie na zajęciach
 C01-C03, C04-C06 - po jedno dla każdego
+
+## Zajęcia 3 13.11.2023 Wyrażenia regularne
+
+D01 - D04 - do wykonania przez każdego
+
+Dokumentacja wyrażeń regularnych w python3: https://docs.python.org/3/library/re.html
+
+### Podstawowe funkcje
+
+search - zwraca pierwsze dopasowanie w napisie
+
+findall - zwraca listę wszystkich dopasowań (nienakładających się na siebie)
+
+match - zwraca dopasowanie od początku string
+
+To tylko podstawowe funkcje, z których będziemy korzystać. W dokumentacji opisane są wszystkie.
+
+### Obiekt match
+
+```
+import re
+answer = re.search('na','banan')
+print(answer)
+print(type(answer))
+print(answer.start())
+print(answer.end())
+print(answer.group())
+
+answer = re.search('na','kabanos')
+print(answer)
+
+if answer:
+    print(answer.group())
+else:
+    pass
+```
+
+### Metaznaki
+
+
+- [] -  zbiór znaków
+- . - jakikolwiek znak
+
+- ^ - początek napisu
+- $ - koniec napisu
+
+- ? - znak występuje lub nie występuje
+- \* - zero albo więcej pojawień się
+- \+ - jeden albo więcej pojawień się
+- {} - dokładnie tyle pojawień się
+
+- | - lub
+- () - grupa
+- \ -znak ucieczki
+
+- \d digit
+- \D nie digit
+- \s whitespace
+- \S niewhitespace
+
+
+### Flagi
+
+Można użyć specjalnych flag, np:
+`re.search('ma', 'AlA Ma KoTa', re.IGNORECASE)`.
+
+### Przykłady (objaśnienia na laboratoriach)
+
+```
+import re
+
+text = 'Ala ma kota i hamak, oraz 150 bananów.'
+
+re.search('ma',text)
+re.match('ma',text)
+re.match('Ala ma',text)
+re.findall('ma',text)
+
+re.findall('[mn]a',text)
+re.findall('[0-9]',text)
+re.findall('[0-9abc]',text)
+re.findall('[a-z][a-z]ma[a-z]',text)
+re.findall('[a-zA-Z][a-zA-Z]ma[a-zA-z0-9]',text)
+re.findall('\d',text)
+
+re.search('[0-9][0-9][0-9]',text)
+re.search('[\d][\d][\d]',text)
+
+re.search('\d{2}',text)
+re.search('\d{3}',text)
+
+re.search('\d+',text)
+
+re.search('\d+ bananów',text)
+re.search('\d* bananów','Ala ma dużo bananów')
+re.search('\d* bananów',text)
+re.search('ma \d? bananów','Ala ma 5 bananów')
+re.search('ma ?\d? bananów','Ala ma bananów')
+re.search('ma( \d)? bananów','Ala ma bananów') 
+
+re.search('\d+ bananów','Ala ma 10 bananów albo 20 bananów')
+re.search('\d+ bananów$','Ala ma 10 bananów albo 20 bananów')
+
+text = 'Ala ma kota i hamak, oraz 150	bananów.'
+
+re.search('\d+ bananów',text)
+
+re.search('\d+\sbananów',text)
+
+re.search('kota . hamak',text)
+
+re.search('kota . hamak','Ala ma kota z hamakiem')
+
+re.search('kota .* hamak','Ala ma kota lub hamak')
+
+re.search('\.',text)
+
+re.search('kota|psa','Ala ma kota lub hamak')
+
+re.findall('kota|psa','Ala ma kota lub psa')
+
+re.search('kota (i|lub) psa','Ala ma kota lub psa')
+
+re.search('mam (kota).*(kota|psa)','Ja mam kota. Ala ma psa.').group(0)
+
+re.search('mam (kota).*(kota|psa)','Ja mam kota. Ala ma psa.').group(1)
+
+re.search('mam (kota).*(kota|psa)','Ja mam kota. Ala ma psa.').group(2)
+```
+
+### Przykłady wyrażenia regularne 2 (objaśnienia na laboratoriach)
+
+####  ^
+```
+re.search('[0-9]+', '123-456-789')
+re.search('[^0-9][0-9]+[^0-9]', '123-456-789')
+```
+
+#### cudzysłów
+'' oraz "" - oznaczają to samo w pythonie
+
+' ala ma psa o imieniu "Burek"'
+
+" ala ma psa o imieniu 'Burek' "
+
+' ala ma psa o imieniu \'Burek\' '
+
+" ala ma psa o imieniu \"Burek\" "
+
+#### multiline string
+
+#### raw string
+
+przy raw string znaki \ traktowane są jako zwykłe znaki \
+
+chociaż nawet w raw string nadal są escapowane (ale wtedy \ pozostają również w stringu bez zmian)
+
+https://docs.python.org/3/reference/lexical_analysis.html
+
+dobra praktyka - wszędzie escapować
+
+```
+'\\'
+print('\\')
+
+r'\\'
+print(r'\\')
+
+
+print("abcd")
+print("ab\cd")
+print(r"ab\cd")
+
+print("ab\nd")
+print(r"ab\nd")
+
+
+print("\"")
+print(r"\"")
+
+print("\")
+print(r"\")
+
+re.search('\\', r'a\bc')
+re.search(r'\\', r'a\bc')
+re.search('\\\\', r'a\bc')
+```
--- a/TaskC04/description.txt
+++ b/TaskC04/description.txt
@ -4,8 +4,8 @@ Deterministic automaton III
 Read a description of a finite-state automaton in the AT&T format
 (without weights) from the file in the first argument. Then, read strings from the
 standard input. If a string is
-accepted by the automated, write YES, a space and the string on the
-standard output, otherwise — write NO, a space and the string.
+accepted by the automaton, write TRUE, a space and the string on the
+standard output, otherwise — write FALSE, a space and the string.

 If there is a non-determinism in the automaton, the first transition should be chosen.

@ -17,8 +17,11 @@ is at most one epsilon transition from a given state and that there
 are no cycles with epsilon transition.

 Your program does not have to check whether the description is correct
-and whether the automaton is deterministic. You can assume that the
-automaton does not contain epsilon transitions.
+and whether the automaton is deterministic.
+
+long.arg - this automaton accepts two texts - "aaaa" and "a" replicated 4038 times.
+simple1.arg - simple automaton accepting only text "abc"
+simple2.arg - automaton accepting text "ab*c" (b replicated any number of times) and "kot"

 POINTS: 3
 DEADLINE: 2023-11-12 23:59:59
--- a/TaskC04/long.arg
+++ b/TaskC04/long.arg
@ -1,4 +1,3 @@
-# automat akceptuje dwa napisy - "aaaa" i "a" powielone 4038 razy
 0	1	a
 1	2	a
 2	3	a
--- a/TaskC04/simple1.arg
+++ b/TaskC04/simple1.arg
@ -1,4 +1,3 @@
-# prosty automat akceptujący tylko napis "abc"
 0	1	a
 1	2	b
 2	3	c
--- a/TaskC04/simple2.arg
+++ b/TaskC04/simple2.arg
@ -1,4 +1,3 @@
-# automat akceptujący napis "ab*c" (b powielony dowolną liczbę razy) i "kot"
 0	1	a
 1	1	b
 1	2	c
--- a/TaskC06/medium2.exp
+++ b/TaskC06/medium2.exp
@ -1 +0,0 @@
-a
--- a/TaskC06/medium2.in
+++ b/TaskC06/medium2.in
--- a/TaskC06/medium2.out
+++ b/TaskC06/medium2.out
--- a/TaskD01/description.txt
+++ b/TaskD01/description.txt
@ -0,0 +1,5 @@
+Write a program to find lines containing the word "Hamlet".
+Do use regular expressions.
+
+POINTS: 1
+DEADLINE: 2023-11-26 23:59:59
--- a/TaskD01/simple.exp
+++ b/TaskD01/simple.exp
@ -0,0 +1,2 @@
+Here comes Hamlet
+Hamlet Hamlet again
--- a/TaskD01/simple.in
+++ b/TaskD01/simple.in
@ -0,0 +1,3 @@
+Here comes Hamlet
+ABC
+Hamlet Hamlet again
--- a/TaskD02/description.txt
+++ b/TaskD02/description.txt
@ -0,0 +1,7 @@
+Write a program to find lines containing the word "pies" separated by spaces.
+The word does not need to have space on the left if it is the line beginning or space on the right if it is line ending.
+Return line no matter of word "pies" casing.
+Do use regular expressions.
+
+POINTS: 1
+DEADLINE: 2023-11-26 23:59:59
--- a/TaskD02/simple.exp
+++ b/TaskD02/simple.exp
@ -0,0 +1,3 @@
+Pies ma Alę
+Kot i pies to zwierzęta
+pies
--- a/TaskD02/simple.in
+++ b/TaskD02/simple.in
@ -0,0 +1,5 @@
+Pies ma Alę
+Ala ma psa
+tu nic nie ma
+Kot i pies to zwierzęta
+pies
--- a/TaskD03/description.txt
+++ b/TaskD03/description.txt
@ -0,0 +1,6 @@
+Write a program to find lines containing date from 1900 to 1999 in format '19XX r.' no matter what on the left or right of the expression.
+Note that part ' r.' is obligatory.
+Do use regular expressions.
+
+POINTS: 1
+DEADLINE: 2023-11-26 23:59:59
--- a/TaskD03/simple.exp
+++ b/TaskD03/simple.exp
@ -0,0 +1,3 @@
+Kiedyś był 1934 r.
+Kiedyś był 1934 r.fsdfsdfsdf
+1934 r. to jakaś data
--- a/TaskD03/simple.in
+++ b/TaskD03/simple.in
@ -0,0 +1,5 @@
+Kiedyś był 1934 r.
+Kiedyś był 1934 r.fsdfsdfsdf
+Kiedyś był 1935 rok
+1934 r. to jakaś data
+1934  to też jakaś data
--- a/TaskD04/description.txt
+++ b/TaskD04/description.txt
@ -0,0 +1,6 @@
+Write a program to find all maximum substrings of digits.
+Return only these substrings separated by spaces in their order.
+Do use regular expressions.
+
+POINTS: 2
+DEADLINE: 2023-11-26 23:59:59
--- a/TaskD04/simple.exp
+++ b/TaskD04/simple.exp
@ -0,0 +1,4 @@
+34234 34 5
+34535
+34
+1992 1999
--- a/TaskD04/simple.in
+++ b/TaskD04/simple.in
@ -0,0 +1,5 @@
+34234 34  dfd gfd 5
+34535
+fsdflskfjsdflk
+fsdkfj sdf34fdfd
+Firma powstała w 1992 r., z połączenia Authorware, Inc. (twórców pakietu Authorware) i MacroMind-Paracomp (producenta Macromind Director). W 1999 r. Macromedia zakupiła firmę Allaire i jej bi
--- a/TaskX02/run.py
+++ b/TaskX02/run.py
@ -0,0 +1,6 @@
+import sys
+
+count = 0
+for line in sys.stdin:
+    count += 1
+print(count)
--- a/TaskX03/run.py
+++ b/TaskX03/run.py
@ -4,4 +4,4 @@ for line in sys.stdin:
    stripped_line = line.strip()
    line_length = len(stripped_line)
    
-    print(f"{line_length} {stripped_line}")
+    print(f"{line_length} {stripped_line}")
--- a/run_report.py
+++ b/run_report.py
@ -11,7 +11,9 @@ def execute_task(dir):
    for task_set in task_sets:
        try:
            with open(Path(dir, f'{task_set}.in')) as f_in, open(Path(dir, f'{task_set}.out'), 'w') as f_out:
-                arg = [x for x in dir.iterdir() if str(x).endswith('.arg')]  #  arg = [x for x in dir.iterdir() if str(x).endswith(f'{task_set}.arg')]
+                arg = [x for x in dir.iterdir() if str(x).endswith(f'{task_set}.arg')]  #  arg = [x for x in dir.iterdir() if str(x).endswith(f'{task_set}.arg')]
+                if not arg:
+                    arg = [x for x in dir.iterdir() if str(x).endswith('fsa_description.arg')] # arg = Path(dir, 'fsa_description.arg') #
                if str(dir).startswith('TaskH'):
                    compilation_command = ['thraxcompiler', f'--input_grammar={Path(dir, "grammar.grm")}',
                                           f'--output_far={Path(dir, "grammar.far")}']
@ -39,7 +41,7 @@ def get_index():

 def is_task_set_correct(dir, task_set):
    try:
-        with open(Path(dir, f'{task_set}.out')) as f_exp, open(Path(dir, f'{task_set}.exp')) as f_out:
+        with open(Path(dir, f'{task_set}.out')) as f_out, open(Path(dir, f'{task_set}.exp')) as f_exp:
            f_out_lines = ''.join(f_out.readlines())
            f_exp_lines = ''.join(f_exp.readlines())
            return f_out_lines == f_exp_lines