73 lines
2.1 KiB
Python
73 lines
2.1 KiB
Python
|
from flask import Flask, request, jsonify
|
||
|
import fitz
|
||
|
import os
|
||
|
import re
|
||
|
|
||
|
app = Flask(__name__)
|
||
|
UPLOAD_FOLDER = 'uploads'
|
||
|
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
||
|
|
||
|
def extract_total_alternate(text):
|
||
|
try:
|
||
|
match = re.search(r'(?i)PODSUMOWANIE\nWartość netto\nStawka VAT\nVAT\nWartość brutto\n\d+(\.\d{1,2})?\n\d+%?\n\d+(\.\d{1,2})?\n(\d+\.\d{1,2})?', text, re.DOTALL)
|
||
|
print(match.groups())
|
||
|
if match:
|
||
|
return float(match.group(3).replace(',', '.'))
|
||
|
return None
|
||
|
except Exception:
|
||
|
return None
|
||
|
|
||
|
def extract_total(text):
|
||
|
total_match = re.search(r'(?i)Wartość brutto\n([0-9.,]+) PLN', text)
|
||
|
total = float(total_match.group(1).replace(',', '.')) if total_match else None
|
||
|
|
||
|
if total is None:
|
||
|
total = extract_total_alternate(text)
|
||
|
|
||
|
return total
|
||
|
|
||
|
def extract_invoice_data(text):
|
||
|
try:
|
||
|
seller_match = re.search(r'(?i)Sprzedawca:\n(.*?)\n', text)
|
||
|
seller_name = seller_match.group(1).strip() if seller_match else None
|
||
|
|
||
|
nip_match = re.search(r'(?i)Sprzedawca:.*?NIP:\s*(\d+)', text, re.DOTALL)
|
||
|
vat_id = nip_match.group(1) if nip_match else None
|
||
|
|
||
|
total = extract_total(text)
|
||
|
|
||
|
return {
|
||
|
"vat_id": vat_id,
|
||
|
"seller_name": seller_name,
|
||
|
"total": total
|
||
|
}
|
||
|
except Exception as e:
|
||
|
return {"error": f"Failed to extract data: {str(e)}"}
|
||
|
|
||
|
@app.route('/invoice', methods=['POST'])
|
||
|
def process_invoice():
|
||
|
if 'file' not in request.files:
|
||
|
return jsonify({"error": "No file provided"}), 400
|
||
|
|
||
|
file = request.files['file']
|
||
|
if file.filename == '':
|
||
|
return jsonify({"error": "Empty filename"}), 400
|
||
|
|
||
|
filepath = os.path.join(UPLOAD_FOLDER, file.filename)
|
||
|
file.save(filepath)
|
||
|
|
||
|
try:
|
||
|
text = ""
|
||
|
with fitz.open(filepath) as pdf:
|
||
|
for page in pdf:
|
||
|
text += page.get_text()
|
||
|
invoice_data = extract_invoice_data(text)
|
||
|
|
||
|
return jsonify(invoice_data), 200
|
||
|
except Exception as e:
|
||
|
return jsonify({"error": str(e)}), 500
|
||
|
finally:
|
||
|
os.remove(filepath)
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
app.run(host='0.0.0.0', port=8080)
|