Invoice recognition

This commit is contained in:
Kuba 2023-01-31 23:51:50 +01:00
parent f4196ef4f8
commit 30ddea926d
5 changed files with 53 additions and 152 deletions

1
.gitignore vendored
View File

@ -2,6 +2,7 @@
# Byte-compiled / optimized / DLL files
__pycache__/
cli.py
*/*venv
*$py.class
# C extensions

View File

@ -1,2 +1,8 @@
# aws
# AWS image recognition with Textract
### To deploy:
- clone lambda.py code into your lambda trigger
- Add S3 bucket to your Lambda Trigger
- Add invoice to your bucket
- Enjoy!

45
invoice/lambda.py Normal file
View File

@ -0,0 +1,45 @@
import boto3
from collections import defaultdict
from urllib.parse import unquote_plus
import json
def print_labels_and_values(field, keys):
if "LabelDetection" in field and "ValueDetection" in field:
a, b = str(field.get('LabelDetection')['Text']), str(field.get('ValueDetection')['Text'])
for w in keys:
if w in a:
print(f"{a}:{b}")
return w, b
return None, None
def process_expense_analysis(response):
wanted = {"NIP":"", "Sprzedawca":"", "brutto":""}
for expense_doc in response["ExpenseDocuments"]:
for summary_field in expense_doc["SummaryFields"]:
a,b = print_labels_and_values(summary_field, wanted.keys())
if a != None:
wanted[a] = b
print()
return wanted
def lambda_handler(event, context):
file_obj = event["Records"][0]
bucket = unquote_plus(str(file_obj["s3"]["bucket"]["name"]))
file_name = unquote_plus(str(file_obj["s3"]["object"]["key"]))
print(f'Bucket: {bucket}, file: {file_name}')
client = boto3.client('textract')
response = client.analyze_expense(Document={'S3Object': {'Bucket': bucket, "Name": file_name}})
invoice_data = process_expense_analysis(response)
invoice_data['name'] = file_name
print(json.dumps(invoice_data, indent=4))
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('texttract-s478874')
table.put_item(Item=invoice_data)

View File

@ -1,143 +0,0 @@
from cli import aws_access_key_id, aws_secret_access_key, aws_session_token, DEFAULT_VPC
import boto3, time
index = '478874'
key_name = index + '_key'
group_name = index + '_group'
target_name = index + '_target'
lb_name = index + '_loadbalancer'
user_data=r'''
#!/bin/bash
sudo yum update -y
sudo yum install git -y
git clone https://git.wmi.amu.edu.pl/bikol/DPZC-2022-23.git
cd 04_Public_cloud/zadania
sudo yum install docker -y
sudo service docker start
sudo usermod -a -G docker ec2-user
docker build -t webservice .
docker run -d -p 80:8080 -t webservice
'''
if __name__ == '__main__':
ec2 = boto3.resource(
'ec2',
region_name='us-east-1',
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_session_token=aws_session_token,
)
key_pair = ec2.create_key_pair(
KeyName=key_name,
KeyType='ed25519',
KeyFormat='pem',
)
security_group = ec2.create_security_group(
Description=group_name,
GroupName=group_name,
VpcId=DEFAULT_VPC,
)
inbound_rules = security_group.authorize_ingress(
GroupId=security_group.group_id,
CidrIp='0.0.0.0/0',
IpProtocol='tcp',
FromPort=80,
ToPort=80,
)
#TODO instancja EC2
instance1, instance2 = ec2.create_instances(
ImageId='ami-0b5eea76982371e91',
MinCount=2,
MaxCount=2,
InstanceType='t2.micro',
KeyName=key_pair.name,
UserData=user_data,
SecurityGroups=[security_group.group_name],
)
while True:
time.sleep(1)
instance1 = ec2.Instance(instance1.id)
instance2 = ec2.Instance(instance2.id)
if instance1.state['Code'] == 16 and instance2.state['Code'] == 16:
break
#TODO target group
elbv2 = boto3.client(
'elbv2',
region_name='us-east-1',
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_session_token=aws_session_token,
)
target_group = elbv2.create_target_group(
Name=target_name,
Protocol='TCP',
Port=80,
VpcId=DEFAULT_VPC,
TargetType='instance',
IpAddressType='ipv4',
)
registered_targets = elbv2.register_targets(
TargetGroupArn=target_group['TargetGroups'][0]['TargetGroupArn'],
Targets=[
{
'Id': instance1.id,
'Port': 80,
},
{
'Id': instance2.id,
'Port': 80,
},
]
)
#TODO elastic IP
ec2_client = boto3.client(
'ec2',
region_name='us-east-1',
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_session_token=aws_session_token,
)
allocation = ec2_client.allocate_address(
Domain='vpc'
)
#TODO load balancer
load_balancer = elbv2.create_load_balancer(
Name=lb_name,
SubnetMappings=[
{
'SubnetId': instance1.subnet_id,
'AllocationId': allocation['AllocationId'],
},
],
Scheme='internet-facing',
Type='network',
IpAddressType='ipv4',
)
listener = elbv2.create_listener(
LoadBalancerArn=load_balancer['LoadBalancers'][0]['LoadBalancerArn'],
Protocol='TCP',
Port=80,
DefaultActions=[
{
'Type': 'forward',
'TargetGroupArn': target_group['TargetGroups'][0]['TargetGroupArn'],
},
],
)
print(f'{allocation["PublicIp"]}:80')

View File

@ -1,8 +0,0 @@
#!/bin/bash
echo "Creating virtual enviroment ..."
python3 -m venv aws_lb_venv
echo "Installing dependencies ..."
pip install -q boto3, time
echo "Initiating gitea cloud deploy ..."
chmod +x a.py
python3 a.py