Overwrite old files
This commit is contained in:
parent
c7d11f6637
commit
fbc4ef18c3
103
api.py
103
api.py
@ -1,81 +1,23 @@
|
|||||||
import cv2
|
|
||||||
import pytesseract
|
|
||||||
import fitz
|
|
||||||
import re
|
|
||||||
import uvicorn
|
|
||||||
from tika import parser
|
|
||||||
from fastapi import FastAPI, UploadFile
|
from fastapi import FastAPI, UploadFile
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from tika import parser
|
||||||
|
import uvicorn
|
||||||
|
import re
|
||||||
|
|
||||||
|
def parse(pdf):
|
||||||
|
content = parser.from_buffer(pdf)
|
||||||
|
content = content['content'].split('\n')
|
||||||
|
content = [c for c in content if c != '']
|
||||||
|
total = float(re.findall(r'\d+[.]\d+', list(filter(lambda x: 'Razem' in x, content))[0])[-1])
|
||||||
|
content = content[content.index('Sprzedawca:') : content.index('Nabywca:')]
|
||||||
|
seller = content[1]
|
||||||
|
nip = content[-1].replace('NIP: ', '')
|
||||||
|
return {
|
||||||
|
'vat_id' : nip,
|
||||||
|
'address' : seller,
|
||||||
|
'total' : total
|
||||||
|
}
|
||||||
|
|
||||||
DPI = 300
|
|
||||||
ZOOM = DPI / 72
|
|
||||||
|
|
||||||
def save_pdf(pdf_bytes):
|
|
||||||
file_name = "invoice.pdf"
|
|
||||||
binary_file = open(file_name, "wb")
|
|
||||||
binary_file.write(pdf_bytes)
|
|
||||||
binary_file.close()
|
|
||||||
return file_name
|
|
||||||
|
|
||||||
|
|
||||||
def convert_pdf_to_png(file):
|
|
||||||
file_name = "invoice.png"
|
|
||||||
magnify = fitz.Matrix(ZOOM, ZOOM)
|
|
||||||
doc = fitz.open(file)
|
|
||||||
pix = doc[0].get_pixmap(matrix=magnify)
|
|
||||||
pix.save(file_name)
|
|
||||||
return file_name
|
|
||||||
|
|
||||||
|
|
||||||
def ocr_png_file(image_name):
|
|
||||||
image = cv2.imread(image_name)
|
|
||||||
custom_config = r'-l pol --oem 3 --psm 6'
|
|
||||||
ocr_string = pytesseract.image_to_string(image, config=custom_config)
|
|
||||||
return ocr_string
|
|
||||||
|
|
||||||
|
|
||||||
def find_vat_id(invoice_text):
|
|
||||||
pattern = "NIP: ([0-9])+ "
|
|
||||||
result = re.search(pattern, invoice_text)
|
|
||||||
vat_id = result.group()
|
|
||||||
vat_id = vat_id.replace("NIP: ", "")
|
|
||||||
return vat_id.strip()
|
|
||||||
|
|
||||||
|
|
||||||
def find_seller_name(invoice_text):
|
|
||||||
pattern = "Nabywca:\n.*((?=sp.|spółka|S.A.|S.K.A.))"
|
|
||||||
result = re.search(pattern, invoice_text)
|
|
||||||
seller_name = result.group()
|
|
||||||
seller_name = seller_name.replace("Nabywca:\n", "")
|
|
||||||
return seller_name.strip() + " sp.z.o.o"
|
|
||||||
|
|
||||||
|
|
||||||
def find_total_sum(invoice_text):
|
|
||||||
pattern_afv = "(?<=Wartość brutto )[0-9]{1,}.[0-9]{1,}(?= PLN\n)"
|
|
||||||
pattern_bfv = "(?<=PLN )[0-9]{1,}.[0-9]{1,}(?= PLN\n)"
|
|
||||||
regex_pattern = pattern_afv + "|" + pattern_bfv
|
|
||||||
result = re.search(regex_pattern, invoice_text)
|
|
||||||
total_sum = result.group()
|
|
||||||
total_sum = total_sum.replace("Do zapłaty:", "")
|
|
||||||
sum_string = total_sum.strip()
|
|
||||||
return float(sum_string)
|
|
||||||
|
|
||||||
|
|
||||||
# @app.post("/invoice")
|
|
||||||
# async def create_file(file: bytes = File()):
|
|
||||||
# pdf_file = save_pdf(pdf_bytes=file)
|
|
||||||
# png_file = convert_pdf_to_png(file=pdf_file)
|
|
||||||
# invoice_text = ocr_png_file(image_name=png_file)
|
|
||||||
# vat_id = find_vat_id(invoice_text=invoice_text)
|
|
||||||
# seller_name = find_seller_name(invoice_text=invoice_text)
|
|
||||||
# total_sum = find_total_sum(invoice_text=invoice_text)
|
|
||||||
# json = {
|
|
||||||
# "vat_id": vat_id,
|
|
||||||
# "seller_name": seller_name,
|
|
||||||
# "total": total_sum
|
|
||||||
# }
|
|
||||||
# return json
|
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
@ -91,16 +33,7 @@ app.add_middleware(
|
|||||||
|
|
||||||
@app.post('/invoice')
|
@app.post('/invoice')
|
||||||
async def root(file: UploadFile):
|
async def root(file: UploadFile):
|
||||||
invoice_text = parser.from_buffer(file.file.read())
|
return parse(file.file.read())
|
||||||
vat_id = find_vat_id(invoice_text=invoice_text)
|
|
||||||
seller_name = find_seller_name(invoice_text=invoice_text)
|
|
||||||
total_sum = find_total_sum(invoice_text=invoice_text)
|
|
||||||
json = {
|
|
||||||
"vat_id": vat_id,
|
|
||||||
"seller_name": seller_name,
|
|
||||||
"total": total_sum
|
|
||||||
}
|
|
||||||
return json
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|
53
main.py
53
main.py
@ -1,24 +1,10 @@
|
|||||||
from credentials import aws_access_key_id, aws_secret_access_key, aws_session_token, default_vpc
|
aws_access_key_id="ASIA47BSNPAUXFZ4T47L"
|
||||||
|
aws_secret_access_key="wvB7oGz94O9j/tLhsTxRjfqmOFsTUctujm1kQioX"
|
||||||
|
aws_session_token="FwoGZXIvYXdzECcaDLqPCWJkmkN46SLS3yLDAR7y4YnKdvWiPKmi0URNSk0ftfYJenKox99GuNk7ukXYJK076N4kN+XzUxaCoyh/N10rQawrqdk2+C5X8UZ774eV02cprw5HI7geB11hTafb03jp0zawLoAjbGKj+2Tyuaxxpmyw3clijyie5uRAWhgyl2rn42UgDkpd8Cr58kQymvVVL60QGPbCEMrij9+/ZFsmanoNlg8DXhpkxsX5ISdvhFTlOCBM5hzFSIIsr+M5DJlDjZIK/fGZK2fYeZZYQ6xTgijkmt+eBjItmJhbCtRcFJZu+nxtNM2f2r15czCLz2bLGBX1+TH4/pd68UujBAvdDWdvuhV+"
|
||||||
|
VPC="vpc-0c8e1d7baf89a7991"
|
||||||
|
ID="s444018"
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
|
|
||||||
INDEKS = "444018"
|
|
||||||
key_name = f"{INDEKS}-aws-ssh-key"
|
|
||||||
security_group_name = f"{INDEKS}-security-group"
|
|
||||||
|
|
||||||
user_data = f'''
|
|
||||||
#!/bin/bash
|
|
||||||
sudo yum update -y
|
|
||||||
sudo yum install git -y
|
|
||||||
git clone https://git.wmi.amu.edu.pl/s444018/DPZC-5.git
|
|
||||||
cd DPZC-5
|
|
||||||
sudo yum install docker -y
|
|
||||||
sudo service docker start
|
|
||||||
sudo usermod -a -G docker ec2-user
|
|
||||||
sudo docker build -t invoice .
|
|
||||||
sudo docker run -d -p 80:8000 -t invoice
|
|
||||||
'''
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
ec2 = boto3.resource(
|
ec2 = boto3.resource(
|
||||||
'ec2',
|
'ec2',
|
||||||
region_name='us-east-1',
|
region_name='us-east-1',
|
||||||
@ -28,15 +14,15 @@ if __name__ == '__main__':
|
|||||||
)
|
)
|
||||||
|
|
||||||
key_pair = ec2.create_key_pair(
|
key_pair = ec2.create_key_pair(
|
||||||
KeyName=key_name,
|
KeyName=ID+'-KEY-PAIR',
|
||||||
KeyType='ed25519',
|
KeyType='ed25519',
|
||||||
KeyFormat='pem',
|
KeyFormat='pem',
|
||||||
)
|
)
|
||||||
|
|
||||||
security_group = ec2.create_security_group(
|
security_group = ec2.create_security_group(
|
||||||
Description=security_group_name,
|
Description=ID+'-GROUP',
|
||||||
GroupName=security_group_name,
|
GroupName=ID+'-GROUP',
|
||||||
VpcId=default_vpc,
|
VpcId=VPC
|
||||||
)
|
)
|
||||||
|
|
||||||
inbound_rules = security_group.authorize_ingress(
|
inbound_rules = security_group.authorize_ingress(
|
||||||
@ -44,15 +30,26 @@ if __name__ == '__main__':
|
|||||||
CidrIp='0.0.0.0/0',
|
CidrIp='0.0.0.0/0',
|
||||||
IpProtocol='tcp',
|
IpProtocol='tcp',
|
||||||
FromPort=80,
|
FromPort=80,
|
||||||
ToPort=80,
|
ToPort=80
|
||||||
)
|
)
|
||||||
|
|
||||||
instance = ec2.create_instances(
|
instances = ec2.create_instances(
|
||||||
ImageId='ami-0b5eea76982371e91',
|
ImageId='ami-0b5eea76982371e91',
|
||||||
MinCount=1,
|
MinCount=1,
|
||||||
MaxCount=1,
|
MaxCount=1,
|
||||||
InstanceType='t2.micro',
|
InstanceType='t2.micro',
|
||||||
KeyName=key_pair.name,
|
KeyName=key_pair.name,
|
||||||
UserData=user_data,
|
UserData=f'''
|
||||||
SecurityGroups=[security_group.group_name],
|
#!/bin/bash
|
||||||
|
sudo yum update -y
|
||||||
|
sudo yum install git -y
|
||||||
|
git clone https://git.wmi.amu.edu.pl/s444376/DPZC_Ola.git
|
||||||
|
cd DPZC_Ola/Zadanie_5.2_5.3
|
||||||
|
sudo yum install docker -y
|
||||||
|
sudo service docker start
|
||||||
|
sudo usermod -a -G docker ec2-user
|
||||||
|
docker build -t invoice .
|
||||||
|
docker run -d -p 80:8000 -t invoice
|
||||||
|
''',
|
||||||
|
SecurityGroups=[security_group.group_name]
|
||||||
)
|
)
|
22
web/index.html
Normal file
22
web/index.html
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<script>
|
||||||
|
async function parse(){
|
||||||
|
document.getElementById("result").innerHTML = "Dodawanie pliku..."
|
||||||
|
var data = new FormData()
|
||||||
|
data.append('file', document.getElementById("docpicker").files[0])
|
||||||
|
document.getElementById("result").innerHTML = "Proszę chwilę poczekać, zaraz wypluje info o fakturze :)"
|
||||||
|
resp = await fetch('http://IP_ADDRESS_HERE:80/invoice', {method: "POST", body: data})
|
||||||
|
document.getElementById("result").innerHTML = "Info"
|
||||||
|
data = await resp.json()
|
||||||
|
var result = JSON.stringify(data)
|
||||||
|
document.getElementById("result").innerHTML = result
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
<title>Chmura</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<input type="file" id="docpicker" accept=".pdf" onchange="parse()" />
|
||||||
|
<p id="result"></p>
|
||||||
|
</body>
|
||||||
|
</html>
|
13
web/website-bucket-policy.json
Normal file
13
web/website-bucket-policy.json
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"Version": "2012-10-17",
|
||||||
|
"Id": "MyPolicy",
|
||||||
|
"Statement": [
|
||||||
|
{
|
||||||
|
"Sid": "PublicReadForGetBucketObjects",
|
||||||
|
"Effect": "Allow",
|
||||||
|
"Principal": "*",
|
||||||
|
"Action": "s3:GetObject",
|
||||||
|
"Resource": "arn:aws:s3:::invoices-ocr/*"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user