diff --git a/projekt.py b/projekt.py index e69de29..0a36fc9 100644 --- a/projekt.py +++ b/projekt.py @@ -0,0 +1,21 @@ +import fitz +import pdfplumber +my_path = r"C:\Users\DELL\Downloads\A-24VU-00511.PDF.pdf" + +with fitz.open(my_path) as doc: + order_numbers = [] + order_dates = [] + buyers = [] + + for page in doc: + text = page.get_text("text") + + if "Your ref. no. PO" in text: + parts = text.split("Your ref. no. PO") + for part in parts[1:]: #idziemy od 2 elementu, żeby nie brać tekstu + order_number = part.split()[0].strip() + order_numbers.append(order_number) + + +print("Reference numbers: ", order_numbers) +