aws/invoice/lambda.py
2023-01-31 23:51:50 +01:00

45 lines
1.5 KiB
Python

import boto3
from collections import defaultdict
from urllib.parse import unquote_plus
import json
def print_labels_and_values(field, keys):
if "LabelDetection" in field and "ValueDetection" in field:
a, b = str(field.get('LabelDetection')['Text']), str(field.get('ValueDetection')['Text'])
for w in keys:
if w in a:
print(f"{a}:{b}")
return w, b
return None, None
def process_expense_analysis(response):
wanted = {"NIP":"", "Sprzedawca":"", "brutto":""}
for expense_doc in response["ExpenseDocuments"]:
for summary_field in expense_doc["SummaryFields"]:
a,b = print_labels_and_values(summary_field, wanted.keys())
if a != None:
wanted[a] = b
print()
return wanted
def lambda_handler(event, context):
file_obj = event["Records"][0]
bucket = unquote_plus(str(file_obj["s3"]["bucket"]["name"]))
file_name = unquote_plus(str(file_obj["s3"]["object"]["key"]))
print(f'Bucket: {bucket}, file: {file_name}')
client = boto3.client('textract')
response = client.analyze_expense(Document={'S3Object': {'Bucket': bucket, "Name": file_name}})
invoice_data = process_expense_analysis(response)
invoice_data['name'] = file_name
print(json.dumps(invoice_data, indent=4))
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('texttract-s478874')
table.put_item(Item=invoice_data)