Added regex for postal-codes
This commit is contained in:
parent
c32112799b
commit
364798ca52
18
projekt.py
18
projekt.py
@ -2,9 +2,7 @@ import fitz
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
my_path = r"C:\Users\DELL\Downloads\A-24VU-00511.PDF.pdf"
|
my_path = r"C:\Users\DELL\Downloads\A-24VU-00511.PDF.pdf"
|
||||||
|
postal_code_pattern = r"(?<!\S)(?:(?:[A-Za-z]{3,}\s+)(\d{5}|\d{2}-\d{3})|(\d{5}|\d{2}-\d{3})(?:\s+[A-Za-z]{3,}))(?!\S)"
|
||||||
postal_code_pattern = r'\b(?!0)([1-9]\d?-\d{3}|[1-9]\d{4}|[1-9]\d? \d{3})\b'
|
|
||||||
|
|
||||||
with fitz.open(my_path) as doc:
|
with fitz.open(my_path) as doc:
|
||||||
order_numbers = []
|
order_numbers = []
|
||||||
order_dates = []
|
order_dates = []
|
||||||
@ -34,14 +32,12 @@ with fitz.open(my_path) as doc:
|
|||||||
line_above = lines[i - 1] if i > 0 else ''
|
line_above = lines[i - 1] if i > 0 else ''
|
||||||
line_below = lines[i + 1] if i + 1 < len(lines) else ''
|
line_below = lines[i + 1] if i + 1 < len(lines) else ''
|
||||||
|
|
||||||
|
buyers.append({
|
||||||
if not re.search(r'[^0-9a-zA-Z \-]', line):
|
'line_2': line_2,
|
||||||
buyers.append({
|
'line_above': line_above,
|
||||||
'line_2': line_2,
|
'postal_code_line': line,
|
||||||
'line_above': line_above,
|
'line_below': line_below
|
||||||
'postal_code_line': line,
|
})
|
||||||
'line_below': line_below
|
|
||||||
})
|
|
||||||
|
|
||||||
print("Reference numbers:", order_numbers)
|
print("Reference numbers:", order_numbers)
|
||||||
print("Document dates:", order_dates)
|
print("Document dates:", order_dates)
|
||||||
|
Loading…
Reference in New Issue
Block a user