from flask import Flask, request, jsonify import fitz import os import re app = Flask(__name__) UPLOAD_FOLDER = 'uploads' os.makedirs(UPLOAD_FOLDER, exist_ok=True) def extract_total_alternate(text): try: match = re.search(r'(?i)PODSUMOWANIE\nWartość netto\nStawka VAT\nVAT\nWartość brutto\n\d+(\.\d{1,2})?\n\d+%?\n\d+(\.\d{1,2})?\n(\d+\.\d{1,2})?', text, re.DOTALL) print(match.groups()) if match: return float(match.group(3).replace(',', '.')) return None except Exception: return None def extract_total(text): total_match = re.search(r'(?i)Wartość brutto\n([0-9.,]+) PLN', text) total = float(total_match.group(1).replace(',', '.')) if total_match else None if total is None: total = extract_total_alternate(text) return total def extract_invoice_data(text): try: seller_match = re.search(r'(?i)Sprzedawca:\n(.*?)\n', text) seller_name = seller_match.group(1).strip() if seller_match else None nip_match = re.search(r'(?i)Sprzedawca:.*?NIP:\s*(\d+)', text, re.DOTALL) vat_id = nip_match.group(1) if nip_match else None total = extract_total(text) return { "vat_id": vat_id, "seller_name": seller_name, "total": total } except Exception as e: return {"error": f"Failed to extract data: {str(e)}"} @app.route('/invoice', methods=['POST']) def process_invoice(): if 'file' not in request.files: return jsonify({"error": "No file provided"}), 400 file = request.files['file'] if file.filename == '': return jsonify({"error": "Empty filename"}), 400 filepath = os.path.join(UPLOAD_FOLDER, file.filename) file.save(filepath) try: text = "" with fitz.open(filepath) as pdf: for page in pdf: text += page.get_text() invoice_data = extract_invoice_data(text) return jsonify(invoice_data), 200 except Exception as e: return jsonify({"error": str(e)}), 500 finally: os.remove(filepath) if __name__ == '__main__': app.run(host='0.0.0.0', port=8080)