Add Dockerimage with python env

This commit is contained in:
Stanislaw-Golebiewski 2019-11-25 13:43:17 +01:00
parent 77b371c3d0
commit e5399f5ec7
8 changed files with 237 additions and 8 deletions

26
Dockerfile Normal file
View File

@ -0,0 +1,26 @@
FROM python:3.7
RUN apt-get update \
&& apt-get -y install \
tesseract-ocr \
tesseract-ocr-jpn \
&& apt-get clean
WORKDIR /install
COPY ./api .
# get Polish language train data
# RUN wget -P /usr/share/tesseract-ocr/4.00/tessdata/ https://github.com/tesseract-ocr/tessdata/raw/4.00/pol.traineddata
RUN mv ./tesseract_data/pol.traineddata /usr/share/tesseract-ocr/4.00/tessdata/
RUN pip3 install pipenv_to_requirements gunicorn && \
pipenv run pipenv_to_requirements && \
pip3 install --no-cache -r requirements.txt
WORKDIR /install/api

13
Jenkinsfile vendored
View File

@ -1,10 +1,12 @@
pipeline { pipeline {
agent any agent {
agent { dockerfile true }
}
stages { stages {
stage('Build') { stage('Build') {
steps { steps {
echo 'Building..' sh 'python main.py -i img/biedra.jpg'
} }
} }
stage('Test') { stage('Test') {
@ -12,10 +14,5 @@ pipeline {
echo 'Testing..' echo 'Testing..'
} }
} }
stage('Deploy') {
steps {
echo 'Deploying....'
}
}
} }
} }

16
api/Pipfile Normal file
View File

@ -0,0 +1,16 @@
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true
[dev-packages]
[packages]
opencv-python = "*"
numpy = "*"
argparse = "*"
pytesseract = "*"
pillow = "*"
[requires]
python_version = "3.7"

131
api/Pipfile.lock generated Normal file
View File

@ -0,0 +1,131 @@
{
"_meta": {
"hash": {
"sha256": "1fa95346c7318e1652a62ece78fc08ff06679b98ef5b652b2a1cf546c2c9ac64"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.7"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"argparse": {
"hashes": [
"sha256:62b089a55be1d8949cd2bc7e0df0bddb9e028faefc8c32038cc84862aefdd6e4",
"sha256:c31647edb69fd3d465a847ea3157d37bed1f95f19760b11a47aa91c04b666314"
],
"index": "pypi",
"version": "==1.4.0"
},
"numpy": {
"hashes": [
"sha256:0a7a1dd123aecc9f0076934288ceed7fd9a81ba3919f11a855a7887cbe82a02f",
"sha256:0c0763787133dfeec19904c22c7e358b231c87ba3206b211652f8cbe1241deb6",
"sha256:3d52298d0be333583739f1aec9026f3b09fdfe3ddf7c7028cb16d9d2af1cca7e",
"sha256:43bb4b70585f1c2d153e45323a886839f98af8bfa810f7014b20be714c37c447",
"sha256:475963c5b9e116c38ad7347e154e5651d05a2286d86455671f5b1eebba5feb76",
"sha256:64874913367f18eb3013b16123c9fed113962e75d809fca5b78ebfbb73ed93ba",
"sha256:683828e50c339fc9e68720396f2de14253992c495fdddef77a1e17de55f1decc",
"sha256:6ca4000c4a6f95a78c33c7dadbb9495c10880be9c89316aa536eac359ab820ae",
"sha256:75fd817b7061f6378e4659dd792c84c0b60533e867f83e0d1e52d5d8e53df88c",
"sha256:7d81d784bdbed30137aca242ab307f3e65c8d93f4c7b7d8f322110b2e90177f9",
"sha256:8d0af8d3664f142414fd5b15cabfd3b6cc3ef242a3c7a7493257025be5a6955f",
"sha256:9679831005fb16c6df3dd35d17aa31dc0d4d7573d84f0b44cc481490a65c7725",
"sha256:a8f67ebfae9f575d85fa859b54d3bdecaeece74e3274b0b5c5f804d7ca789fe1",
"sha256:acbf5c52db4adb366c064d0b7c7899e3e778d89db585feadd23b06b587d64761",
"sha256:ada4805ed51f5bcaa3a06d3dd94939351869c095e30a2b54264f5a5004b52170",
"sha256:c7354e8f0eca5c110b7e978034cd86ed98a7a5ffcf69ca97535445a595e07b8e",
"sha256:e2e9d8c87120ba2c591f60e32736b82b67f72c37ba88a4c23c81b5b8fa49c018",
"sha256:e467c57121fe1b78a8f68dd9255fbb3bb3f4f7547c6b9e109f31d14569f490c3",
"sha256:ede47b98de79565fcd7f2decb475e2dcc85ee4097743e551fe26cfc7eb3ff143",
"sha256:f58913e9227400f1395c7b800503ebfdb0772f1c33ff8cb4d6451c06cabdf316",
"sha256:fe39f5fd4103ec4ca3cb8600b19216cd1ff316b4990f4c0b6057ad982c0a34d5"
],
"index": "pypi",
"version": "==1.17.4"
},
"opencv-python": {
"hashes": [
"sha256:04bec0a6d3a00360a7fb769b755ff4489a4ac8291821b785151f63e6d8bb59ea",
"sha256:1a2d1801c038f055852bd2379186ca8b19b4ea24afb0b8410293bc802211579b",
"sha256:1c7d235faef511aca7669f1aa650897b6c058dfde6412ea3fc58feb0fce78814",
"sha256:22c2ee5f97f85903bfb28c056566b2ecaa1d2f804b880ab39ebf94528a402992",
"sha256:25127990671dc8bd27ae8b880d7a39f9aae863052a8fbebe8977c6ce8e5fc0c9",
"sha256:3cef82b6a1f748d2f4527f5932a86d54ebd10bd89f6cf59b003c36b1015055f7",
"sha256:499a0413e7110a934ab56e635252a4c86f8be64de59f94a62318a7b895dc809e",
"sha256:5f2cf5a0ab244a0a1dbe5ec426c277b55e06ac6a472ad61be77ef643a238cbd3",
"sha256:5fec35916a6b9ce935f2e2806084303fd4e3fbb0c973a8db8f54b5aca54613cb",
"sha256:6183c9c7fab4590e0651bc941cde780988c3ad9889bd62de19d581a6f59523ea",
"sha256:67a236db8db84d7fb0f6e127f360ce6669350ef324839132e22879ec90588dab",
"sha256:6c32d36f52a6e0c02d1ab0bb95223cb4dd5525a7e8292a747116126b3d34c578",
"sha256:73a467a78ffd902d2c0265ab6b2e2cdda423d61b3d08685e0c7d0b4572142ff1",
"sha256:76de8a247970d150b1672c6646cda91217d562682e713721fc9b9bf1434553c4",
"sha256:919d5c3ec1a62258ba8c68b869b1056186e2355c4474739b199c295547e66cc1",
"sha256:982d4e80c14356098cde57a6c7d18fe0928a1c3118675bac2252ef38f152e1ab",
"sha256:9d025e6bf2989bcbc7744c26d8bd90c2629a92d8de3ba2416f62ce2a94615dd9",
"sha256:bb59f98205cd81e29f45eed043cf0f98531486dc0b3f671c9e06fecf08f7ccef",
"sha256:c8119248457e909dcd7b598621ed1d139419d69377e8cb4e2b2c49c819de287d",
"sha256:ce7b1f25be04b04f2e678b2bf23a975137f77406dcee66a88a2daeb77cda3e76",
"sha256:d64428bf59ab4d27620b00a2ad6fea2b4d62016a17849c82a7517ec12db97d55",
"sha256:e2ffa3161b8662112f1880734e8b9549d0c9e818e59f652a9d1c5bf31e36586a",
"sha256:e6fc00ac42c800fad5fb3927cfb9bf4e60bb3302cb9805f45b826d5d2546119a",
"sha256:e793df2e12093b3a01006b5b27f321e306193c7a5c9e2a6c8bf652e1ad2d6a86",
"sha256:eae543b3e9253ff702103333aabd87736b5ed5e46ab834d8e0b929f08f494dee",
"sha256:f0af656402b73ead2d9f593c2774c04b01e2d0c63e4f99e0dc2f3fde99be22b4"
],
"index": "pypi",
"version": "==4.1.2.30"
},
"pillow": {
"hashes": [
"sha256:047d9473cf68af50ac85f8ee5d5f21a60f849bc17d348da7fc85711287a75031",
"sha256:0f66dc6c8a3cc319561a633b6aa82c44107f12594643efa37210d8c924fc1c71",
"sha256:12c9169c4e8fe0a7329e8658c7e488001f6b4c8e88740e76292c2b857af2e94c",
"sha256:248cffc168896982f125f5c13e9317c059f74fffdb4152893339f3be62a01340",
"sha256:27faf0552bf8c260a5cee21a76e031acaea68babb64daf7e8f2e2540745082aa",
"sha256:285edafad9bc60d96978ed24d77cdc0b91dace88e5da8c548ba5937c425bca8b",
"sha256:384b12c9aa8ef95558abdcb50aada56d74bc7cc131dd62d28c2d0e4d3aadd573",
"sha256:38950b3a707f6cef09cd3cbb142474357ad1a985ceb44d921bdf7b4647b3e13e",
"sha256:4aad1b88933fd6dc2846552b89ad0c74ddbba2f0884e2c162aa368374bf5abab",
"sha256:4ac6148008c169603070c092e81f88738f1a0c511e07bd2bb0f9ef542d375da9",
"sha256:4deb1d2a45861ae6f0b12ea0a786a03d19d29edcc7e05775b85ec2877cb54c5e",
"sha256:59aa2c124df72cc75ed72c8d6005c442d4685691a30c55321e00ed915ad1a291",
"sha256:5a47d2123a9ec86660fe0e8d0ebf0aa6bc6a17edc63f338b73ea20ba11713f12",
"sha256:5cc901c2ab9409b4b7ac7b5bcc3e86ac14548627062463da0af3b6b7c555a871",
"sha256:6c1db03e8dff7b9f955a0fb9907eb9ca5da75b5ce056c0c93d33100a35050281",
"sha256:7ce80c0a65a6ea90ef9c1f63c8593fcd2929448613fc8da0adf3e6bfad669d08",
"sha256:809c19241c14433c5d6135e1b6c72da4e3b56d5c865ad5736ab99af8896b8f41",
"sha256:83792cb4e0b5af480588601467c0764242b9a483caea71ef12d22a0d0d6bdce2",
"sha256:846fa202bd7ee0f6215c897a1d33238ef071b50766339186687bd9b7a6d26ac5",
"sha256:9f5529fc02009f96ba95bea48870173426879dc19eec49ca8e08cd63ecd82ddb",
"sha256:a423c2ea001c6265ed28700df056f75e26215fd28c001e93ef4380b0f05f9547",
"sha256:ac4428094b42907aba5879c7c000d01c8278d451a3b7cccd2103e21f6397ea75",
"sha256:b1ae48d87f10d1384e5beecd169c77502fcc04a2c00a4c02b85f0a94b419e5f9",
"sha256:bf4e972a88f8841d8fdc6db1a75e0f8d763e66e3754b03006cbc3854d89f1cb1",
"sha256:c6414f6aad598364aaf81068cabb077894eb88fed99c6a65e6e8217bab62ae7a",
"sha256:c710fcb7ee32f67baf25aa9ffede4795fd5d93b163ce95fdc724383e38c9df96",
"sha256:c7be4b8a09852291c3c48d3c25d1b876d2494a0a674980089ac9d5e0d78bd132",
"sha256:c9e5ffb910b14f090ac9c38599063e354887a5f6d7e6d26795e916b4514f2c1a",
"sha256:e0697b826da6c2472bb6488db4c0a7fa8af0d52fa08833ceb3681358914b14e5",
"sha256:e9a3edd5f714229d41057d56ac0f39ad9bdba6767e8c888c951869f0bdd129b0"
],
"index": "pypi",
"version": "==6.2.1"
},
"pytesseract": {
"hashes": [
"sha256:ae1dce01413d1f8eb0614fd65d831e26e649dc1a31699b7275455c57aa563b59"
],
"index": "pypi",
"version": "==0.3.0"
}
},
"develop": {}
}

BIN
api/img/biedra.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 284 KiB

59
api/main.py Normal file
View File

@ -0,0 +1,59 @@
import argparse
import cv2
import os
import sys
import re
import warnings
import pytesseract
import numpy as np
from PIL import Image
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="Path to the image")
args = vars(ap.parse_args())
if (not os.path.isfile(args["image"])):
print(f"Could not find an image '{args['image']}'")
sys.exit(-1)
img = cv2.imread(args["image"])
out_img = img.copy()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)
contours, hierarchy = cv2.findContours(edged.copy(),
cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)
max_area_contour = max(contours, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(max_area_contour)
# out_img = gray[y:y+h, x:x+w]
# ret, out_img = cv2.threshold(gray[y:y+h, x:x+w], 155, 255, cv2.THRESH_TOZERO)
img_cut = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[y:y+h, x:x+w]
img_out = cv2.cvtColor(img_cut, cv2.COLOR_BGR2RGB)
text = pytesseract.image_to_string(Image.fromarray(img_out), config="-l pol")
text_lines = text.split('\n')
index_start = 0
index_stop = len(text_lines) - 1
for i in range(len(text_lines) - 1):
if(re.compile('PARAGON.*FISKALNY.*').match(text_lines[i])):
index_start = i
if(re.compile('SPRZEDA.*').match(text_lines[i])):
index_stop = i
for item_line in text_lines[index_start + 1 : index_stop - 2]:
print(item_line)
# regex = re.compile("([ A-Za-ząćęłśźż]+).*(\d{1,3},\d{2})[A-E]$")
# m = regex.match(item_line)
# if m:
# print(item_line, "===>", m.group(1), m.group(2))
# else:
# print("skipped!")
# # cv2.drawContours(out_img, contours, -1, (0, 255, 0), 3)
# # cv2.rectangle(out_img, (x, y), (x+w, y+h), (0, 0, 255), 2)
# cv2.imshow("cropped", img_out)
# # cv2.imshow("Edged", edged)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

Binary file not shown.