From 364798ca5281e7ed26bd415ba3a207bad2fc0ffb Mon Sep 17 00:00:00 2001 From: Zuzanna Rachuba Date: Sun, 3 Nov 2024 12:15:47 +0100 Subject: [PATCH] Added regex for postal-codes --- projekt.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/projekt.py b/projekt.py index 2d9ce13..795d1e3 100644 --- a/projekt.py +++ b/projekt.py @@ -2,9 +2,7 @@ import fitz import re my_path = r"C:\Users\DELL\Downloads\A-24VU-00511.PDF.pdf" - -postal_code_pattern = r'\b(?!0)([1-9]\d?-\d{3}|[1-9]\d{4}|[1-9]\d? \d{3})\b' - +postal_code_pattern = r"(?0 else '' line_above = lines[i - 1] if i > 0 else '' line_below = lines[i + 1] if i + 1 < len(lines) else '' - - - if not re.search(r'[^0-9a-zA-Z \-]', line): - buyers.append({ - 'line_2': line_2, - 'line_above': line_above, - 'postal_code_line': line, - 'line_below': line_below - }) + + buyers.append({ + 'line_2': line_2, + 'line_above': line_above, + 'postal_code_line': line, + 'line_below': line_below + }) print("Reference numbers:", order_numbers) print("Document dates:", order_dates)