add notes
This commit is contained in:
parent
09484dce79
commit
a88a2a3779
133
README.md
133
README.md
@ -1,4 +1,4 @@
|
|||||||
# DJFZ 2021
|
# DJFZ 202/home/kuba/Syncthing/przedmioty/2021-01/djfz/djfz-2021/README.md1
|
||||||
|
|
||||||
## Zajęcia 1 11.10.2021
|
## Zajęcia 1 11.10.2021
|
||||||
|
|
||||||
@ -174,3 +174,134 @@ re.search('mam (kota).*(kota|psa)','Ja mam kota. Ala ma psa.').group(1)
|
|||||||
|
|
||||||
re.search('mam (kota).*(kota|psa)','Ja mam kota. Ala ma psa.').group(2)
|
re.search('mam (kota).*(kota|psa)','Ja mam kota. Ala ma psa.').group(2)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Przykłady wyrażenia regularne 2 (objaśnienia na laboratoriach)
|
||||||
|
|
||||||
|
#### ^
|
||||||
|
```
|
||||||
|
re.search('[^0-9][0-9]+[^0-9]', '123-456-789')
|
||||||
|
```
|
||||||
|
|
||||||
|
#### cudzysłów
|
||||||
|
'' oraz "" - oznaczają to samo w pythonie
|
||||||
|
|
||||||
|
' ala ma psa o imieniu "Burek"'
|
||||||
|
" ala ma psa o imieniu 'Burek'"
|
||||||
|
|
||||||
|
' ala ma psa o imieniu \'Burek\''
|
||||||
|
" ala ma psa o imieniu \"Burek\""
|
||||||
|
|
||||||
|
#### multiline string
|
||||||
|
|
||||||
|
#### raw string
|
||||||
|
|
||||||
|
przy raw string znaki \ traktowane są jako zwykłe znaki '\'
|
||||||
|
|
||||||
|
chociaż nawet w raw string nadal są escapowane (chociaż wtedy \ pozostają również w stringu bez zmian)
|
||||||
|
|
||||||
|
https://docs.python.org/3/reference/lexical_analysis.html
|
||||||
|
|
||||||
|
dobra praktyka - wszędzie escapować
|
||||||
|
|
||||||
|
```
|
||||||
|
'\\'
|
||||||
|
print('\\')
|
||||||
|
|
||||||
|
r'\\'
|
||||||
|
print(r'\\')
|
||||||
|
|
||||||
|
|
||||||
|
print("abcd")
|
||||||
|
print("ab\cd")
|
||||||
|
print(r"ab\cd")
|
||||||
|
|
||||||
|
print("ab\nd")
|
||||||
|
print(r"ab\nd")
|
||||||
|
|
||||||
|
|
||||||
|
print("\"")
|
||||||
|
print(r"\"")
|
||||||
|
|
||||||
|
print("\")
|
||||||
|
print(r"\")
|
||||||
|
|
||||||
|
re.search('\\', r'a\bc')
|
||||||
|
re.search(r'\\', r'a\bc')
|
||||||
|
re.search('\\\\', r'a\bc')
|
||||||
|
```
|
||||||
|
|
||||||
|
#### RE SUB
|
||||||
|
```
|
||||||
|
re.sub(pattern, replacement, string)
|
||||||
|
|
||||||
|
re.sub('a','b', 'ala ma kota')
|
||||||
|
```
|
||||||
|
|
||||||
|
#### backreferencje:
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
re.search(r' \d+ \d+', 'ala ma 41 41 kota')
|
||||||
|
re.search(r' \d+ \d+', 'ala ma 41 123 kota')
|
||||||
|
re.search(r' (\d+) \1', 'ala ma 41 41 kota')
|
||||||
|
re.search(r' (\d+) \1', 'ala ma 41 123 kota')
|
||||||
|
```
|
||||||
|
|
||||||
|
#### lookahead ( to sa takie assercje):
|
||||||
|
```
|
||||||
|
re.search(r'ma kot', 'ala ma kot')
|
||||||
|
re.search(r'ma kot(?=[ay])', 'ala ma kot')
|
||||||
|
re.search(r'ma kot(?=[ay])', 'ala ma kotka')
|
||||||
|
re.search(r'ma kot(?=[ay])', 'ala ma koty')
|
||||||
|
re.search(r'ma kot(?=[ay])', 'ala ma kota')
|
||||||
|
|
||||||
|
re.search(r'ma kot(?![ay])', 'ala ma kot')
|
||||||
|
re.search(r'ma kot(?![ay])', 'ala ma kotka')
|
||||||
|
re.search(r'ma kot(?![ay])', 'ala ma koty')
|
||||||
|
re.search(r'ma kot(?![ay])', 'ala ma kota')
|
||||||
|
```
|
||||||
|
|
||||||
|
#### named groups
|
||||||
|
```
|
||||||
|
r = re.search(r'ma (?P<ilepsow>\d+) kotow i (?P<ilekotow>\d+) psow', 'ala ma 100 kotow i 200 psow')
|
||||||
|
r.groups()
|
||||||
|
r.groups('ilepsow')
|
||||||
|
r.groups('ilekotow')
|
||||||
|
```
|
||||||
|
|
||||||
|
#### re.split
|
||||||
|
```
|
||||||
|
('a,b,c,d').split(',')
|
||||||
|
re.split(r',', 'a,b,c,d')
|
||||||
|
re.split(r'[.,]', 'a,b.c,d')
|
||||||
|
```
|
||||||
|
#### \w word character
|
||||||
|
```
|
||||||
|
\w - matchuje Unicod word character , jeżeli flaga ASCII to [a-zA-Z0-9_]
|
||||||
|
\w - odwrotne do \W, jezeli flaga ASCI to [^a-zA-Z0-9_]
|
||||||
|
|
||||||
|
re.findall(r'\w+', 'ala ma 3 koty')
|
||||||
|
re.findall(r'\W+', 'ala ma 3 koty')
|
||||||
|
```
|
||||||
|
#### początek albo koniec słowa | word boundary
|
||||||
|
```
|
||||||
|
re.search(r'\bkot\b', 'Ala ma kota')
|
||||||
|
re.search(r'\bkot\b', 'Ala ma kot')
|
||||||
|
re.search(r'\bkot\b', 'Ala ma kot.')
|
||||||
|
re.search(r'\bkot\b', 'Ala ma kot ')
|
||||||
|
|
||||||
|
re.search(r'\Bot\B', 'Ala ma kot ')
|
||||||
|
re.search(r'\Bot\B', 'Ala ma kota ')
|
||||||
|
```
|
||||||
|
#### MULTILINE
|
||||||
|
```
|
||||||
|
re.findall(r'^Ma', 'Ma kota Ala\nMa psa Jacek')
|
||||||
|
re.findall(r'^Ma', 'Ma kota Ala\nMa psa Jacek', re.MULTILINE)
|
||||||
|
```
|
||||||
|
#### RE.COMPILE
|
||||||
|
64
compile_example.py
Normal file
64
compile_example.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
# fron https://stackoverflow.com/questions/452104/is-it-worth-using-pythons-re-compile#comment108948583_452104
|
||||||
|
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
def setup(N=1000):
|
||||||
|
# Patterns 'a.*a', 'a.*b', ..., 'z.*z'
|
||||||
|
patterns = [chr(i) + '.*' + chr(j)
|
||||||
|
for i in range(ord('a'), ord('z') + 1)
|
||||||
|
for j in range(ord('a'), ord('z') + 1)]
|
||||||
|
# If this assertion below fails, just add more (distinct) patterns.
|
||||||
|
# assert(re._MAXCACHE < len(patterns))
|
||||||
|
# N strings. Increase N for larger effect.
|
||||||
|
strings = ['abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz'] * N
|
||||||
|
return (patterns, strings)
|
||||||
|
|
||||||
|
def without_compile():
|
||||||
|
print('Without re.compile:')
|
||||||
|
patterns, strings = setup()
|
||||||
|
print('searching')
|
||||||
|
count = 0
|
||||||
|
for s in strings:
|
||||||
|
for pat in patterns:
|
||||||
|
count += bool(re.search(pat, s))
|
||||||
|
return count
|
||||||
|
|
||||||
|
def without_compile_cache_friendly():
|
||||||
|
print('Without re.compile, cache-friendly order:')
|
||||||
|
patterns, strings = setup()
|
||||||
|
print('searching')
|
||||||
|
count = 0
|
||||||
|
for pat in patterns:
|
||||||
|
for s in strings:
|
||||||
|
count += bool(re.search(pat, s))
|
||||||
|
return count
|
||||||
|
|
||||||
|
def with_compile():
|
||||||
|
print('With re.compile:')
|
||||||
|
patterns, strings = setup()
|
||||||
|
print('compiling')
|
||||||
|
compiled = [re.compile(pattern) for pattern in patterns]
|
||||||
|
print('searching')
|
||||||
|
count = 0
|
||||||
|
for s in strings:
|
||||||
|
for regex in compiled:
|
||||||
|
count += bool(regex.search(s))
|
||||||
|
return count
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
print(with_compile())
|
||||||
|
d1 = time.time() - start
|
||||||
|
print(f'-- That took {d1:.2f} seconds.\n')
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
print(without_compile_cache_friendly())
|
||||||
|
d2 = time.time() - start
|
||||||
|
print(f'-- That took {d2:.2f} seconds.\n')
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
print(without_compile())
|
||||||
|
d3 = time.time() - start
|
||||||
|
print(f'-- That took {d3:.2f} seconds.\n')
|
||||||
|
|
||||||
|
print(f'Ratio: {d3/d1:.2f}')
|
Loading…
Reference in New Issue
Block a user