65 lines
1.8 KiB
Python
65 lines
1.8 KiB
Python
|
# fron https://stackoverflow.com/questions/452104/is-it-worth-using-pythons-re-compile#comment108948583_452104
|
||
|
|
||
|
import re
|
||
|
import time
|
||
|
|
||
|
def setup(N=1000):
|
||
|
# Patterns 'a.*a', 'a.*b', ..., 'z.*z'
|
||
|
patterns = [chr(i) + '.*' + chr(j)
|
||
|
for i in range(ord('a'), ord('z') + 1)
|
||
|
for j in range(ord('a'), ord('z') + 1)]
|
||
|
# If this assertion below fails, just add more (distinct) patterns.
|
||
|
# assert(re._MAXCACHE < len(patterns))
|
||
|
# N strings. Increase N for larger effect.
|
||
|
strings = ['abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz'] * N
|
||
|
return (patterns, strings)
|
||
|
|
||
|
def without_compile():
|
||
|
print('Without re.compile:')
|
||
|
patterns, strings = setup()
|
||
|
print('searching')
|
||
|
count = 0
|
||
|
for s in strings:
|
||
|
for pat in patterns:
|
||
|
count += bool(re.search(pat, s))
|
||
|
return count
|
||
|
|
||
|
def without_compile_cache_friendly():
|
||
|
print('Without re.compile, cache-friendly order:')
|
||
|
patterns, strings = setup()
|
||
|
print('searching')
|
||
|
count = 0
|
||
|
for pat in patterns:
|
||
|
for s in strings:
|
||
|
count += bool(re.search(pat, s))
|
||
|
return count
|
||
|
|
||
|
def with_compile():
|
||
|
print('With re.compile:')
|
||
|
patterns, strings = setup()
|
||
|
print('compiling')
|
||
|
compiled = [re.compile(pattern) for pattern in patterns]
|
||
|
print('searching')
|
||
|
count = 0
|
||
|
for s in strings:
|
||
|
for regex in compiled:
|
||
|
count += bool(regex.search(s))
|
||
|
return count
|
||
|
|
||
|
start = time.time()
|
||
|
print(with_compile())
|
||
|
d1 = time.time() - start
|
||
|
print(f'-- That took {d1:.2f} seconds.\n')
|
||
|
|
||
|
start = time.time()
|
||
|
print(without_compile_cache_friendly())
|
||
|
d2 = time.time() - start
|
||
|
print(f'-- That took {d2:.2f} seconds.\n')
|
||
|
|
||
|
start = time.time()
|
||
|
print(without_compile())
|
||
|
d3 = time.time() - start
|
||
|
print(f'-- That took {d3:.2f} seconds.\n')
|
||
|
|
||
|
print(f'Ratio: {d3/d1:.2f}')
|