From a88a2a3779decd4994c924064540cd03c1346969 Mon Sep 17 00:00:00 2001 From: Jakub Pokrywka Date: Sun, 5 Dec 2021 20:11:19 +0100 Subject: [PATCH] add notes --- README.md | 133 ++++++++++++++++++++++++++++++++++++++++++++- compile_example.py | 64 ++++++++++++++++++++++ 2 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 compile_example.py diff --git a/README.md b/README.md index 726f967..664fe32 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DJFZ 2021 +# DJFZ 202/home/kuba/Syncthing/przedmioty/2021-01/djfz/djfz-2021/README.md1 ## Zajęcia 1 11.10.2021 @@ -174,3 +174,134 @@ re.search('mam (kota).*(kota|psa)','Ja mam kota. Ala ma psa.').group(1) re.search('mam (kota).*(kota|psa)','Ja mam kota. Ala ma psa.').group(2) ``` + + + + + + + +### Przykłady wyrażenia regularne 2 (objaśnienia na laboratoriach) + +#### ^ +``` +re.search('[^0-9][0-9]+[^0-9]', '123-456-789') +``` + +#### cudzysłów +'' oraz "" - oznaczają to samo w pythonie + +' ala ma psa o imieniu "Burek"' +" ala ma psa o imieniu 'Burek'" + +' ala ma psa o imieniu \'Burek\'' +" ala ma psa o imieniu \"Burek\"" + +#### multiline string + +#### raw string + +przy raw string znaki \ traktowane są jako zwykłe znaki '\' + +chociaż nawet w raw string nadal są escapowane (chociaż wtedy \ pozostają również w stringu bez zmian) + +https://docs.python.org/3/reference/lexical_analysis.html + +dobra praktyka - wszędzie escapować + +``` +'\\' +print('\\') + +r'\\' +print(r'\\') + + +print("abcd") +print("ab\cd") +print(r"ab\cd") + +print("ab\nd") +print(r"ab\nd") + + +print("\"") +print(r"\"") + +print("\") +print(r"\") + +re.search('\\', r'a\bc') +re.search(r'\\', r'a\bc') +re.search('\\\\', r'a\bc') +``` + +#### RE SUB +``` +re.sub(pattern, replacement, string) + +re.sub('a','b', 'ala ma kota') +``` + +#### backreferencje: + +``` + +re.search(r' \d+ \d+', 'ala ma 41 41 kota') +re.search(r' \d+ \d+', 'ala ma 41 123 kota') +re.search(r' (\d+) \1', 'ala ma 41 41 kota') +re.search(r' (\d+) \1', 'ala ma 41 123 kota') +``` + +#### lookahead ( to sa takie assercje): +``` +re.search(r'ma kot', 'ala ma kot') +re.search(r'ma kot(?=[ay])', 'ala ma kot') +re.search(r'ma kot(?=[ay])', 'ala ma kotka') +re.search(r'ma kot(?=[ay])', 'ala ma koty') +re.search(r'ma kot(?=[ay])', 'ala ma kota') + +re.search(r'ma kot(?![ay])', 'ala ma kot') +re.search(r'ma kot(?![ay])', 'ala ma kotka') +re.search(r'ma kot(?![ay])', 'ala ma koty') +re.search(r'ma kot(?![ay])', 'ala ma kota') +``` + +#### named groups +``` +r = re.search(r'ma (?P\d+) kotow i (?P\d+) psow', 'ala ma 100 kotow i 200 psow') +r.groups() +r.groups('ilepsow') +r.groups('ilekotow') +``` + +#### re.split +``` +('a,b,c,d').split(',') +re.split(r',', 'a,b,c,d') +re.split(r'[.,]', 'a,b.c,d') +``` +#### \w word character +``` +\w - matchuje Unicod word character , jeżeli flaga ASCII to [a-zA-Z0-9_] +\w - odwrotne do \W, jezeli flaga ASCI to [^a-zA-Z0-9_] + +re.findall(r'\w+', 'ala ma 3 koty') +re.findall(r'\W+', 'ala ma 3 koty') +``` +#### początek albo koniec słowa | word boundary +``` +re.search(r'\bkot\b', 'Ala ma kota') +re.search(r'\bkot\b', 'Ala ma kot') +re.search(r'\bkot\b', 'Ala ma kot.') +re.search(r'\bkot\b', 'Ala ma kot ') + +re.search(r'\Bot\B', 'Ala ma kot ') +re.search(r'\Bot\B', 'Ala ma kota ') +``` +#### MULTILINE +``` +re.findall(r'^Ma', 'Ma kota Ala\nMa psa Jacek') +re.findall(r'^Ma', 'Ma kota Ala\nMa psa Jacek', re.MULTILINE) +``` +#### RE.COMPILE diff --git a/compile_example.py b/compile_example.py new file mode 100644 index 0000000..a05e13b --- /dev/null +++ b/compile_example.py @@ -0,0 +1,64 @@ +# fron https://stackoverflow.com/questions/452104/is-it-worth-using-pythons-re-compile#comment108948583_452104 + +import re +import time + +def setup(N=1000): + # Patterns 'a.*a', 'a.*b', ..., 'z.*z' + patterns = [chr(i) + '.*' + chr(j) + for i in range(ord('a'), ord('z') + 1) + for j in range(ord('a'), ord('z') + 1)] + # If this assertion below fails, just add more (distinct) patterns. + # assert(re._MAXCACHE < len(patterns)) + # N strings. Increase N for larger effect. + strings = ['abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz'] * N + return (patterns, strings) + +def without_compile(): + print('Without re.compile:') + patterns, strings = setup() + print('searching') + count = 0 + for s in strings: + for pat in patterns: + count += bool(re.search(pat, s)) + return count + +def without_compile_cache_friendly(): + print('Without re.compile, cache-friendly order:') + patterns, strings = setup() + print('searching') + count = 0 + for pat in patterns: + for s in strings: + count += bool(re.search(pat, s)) + return count + +def with_compile(): + print('With re.compile:') + patterns, strings = setup() + print('compiling') + compiled = [re.compile(pattern) for pattern in patterns] + print('searching') + count = 0 + for s in strings: + for regex in compiled: + count += bool(regex.search(s)) + return count + +start = time.time() +print(with_compile()) +d1 = time.time() - start +print(f'-- That took {d1:.2f} seconds.\n') + +start = time.time() +print(without_compile_cache_friendly()) +d2 = time.time() - start +print(f'-- That took {d2:.2f} seconds.\n') + +start = time.time() +print(without_compile()) +d3 = time.time() - start +print(f'-- That took {d3:.2f} seconds.\n') + +print(f'Ratio: {d3/d1:.2f}')