aws_faktury/invoice_service.py
dzikafoczka 9b4d51c256 test
2024-12-22 18:57:41 +01:00

75 lines
2.2 KiB
Python

from flask import Flask, request, jsonify
from flask_cors import CORS
import fitz
import os
import re
app = Flask(__name__)
CORS(app)
UPLOAD_FOLDER = 'uploads'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
def extract_total_alternate(text):
try:
match = re.search(r'(?i)PODSUMOWANIE\nWartość netto\nStawka VAT\nVAT\nWartość brutto\n\d+(\.\d{1,2})?\n\d+%?\n\d+(\.\d{1,2})?\n(\d+\.\d{1,2})?', text, re.DOTALL)
print(match.groups())
if match:
return float(match.group(3).replace(',', '.'))
return None
except Exception:
return None
def extract_total(text):
total_match = re.search(r'(?i)Wartość brutto\n([0-9.,]+) PLN', text)
total = float(total_match.group(1).replace(',', '.')) if total_match else None
if total is None:
total = extract_total_alternate(text)
return total
def extract_invoice_data(text):
try:
seller_match = re.search(r'(?i)Sprzedawca:\n(.*?)\n', text)
seller_name = seller_match.group(1).strip() if seller_match else None
nip_match = re.search(r'(?i)Sprzedawca:.*?NIP:\s*(\d+)', text, re.DOTALL)
seller_nip = nip_match.group(1) if nip_match else None
total = extract_total(text)
return {
"seller_nip": seller_nip,
"seller_name": seller_name,
"total": total
}
except Exception as e:
return {"error": f"Failed to extract data: {str(e)}"}
@app.route('/invoice', methods=['POST'])
def process_invoice():
if 'file' not in request.files:
return jsonify({"error": "No file provided"}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"error": "Empty filename"}), 400
filepath = os.path.join(UPLOAD_FOLDER, file.filename)
file.save(filepath)
try:
text = ""
with fitz.open(filepath) as pdf:
for page in pdf:
text += page.get_text()
invoice_data = extract_invoice_data(text)
return jsonify(invoice_data), 200
except Exception as e:
return jsonify({"error": str(e)}), 500
finally:
os.remove(filepath)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080)