From b5e17186226e85c72c3d7ec1b12edcd95b54b882 Mon Sep 17 00:00:00 2001 From: mikolajpaterka Date: Sat, 28 Jan 2023 22:54:41 +0100 Subject: [PATCH] kurcze --- 5/{web => }/Dockerfile | 0 5/__pycache__/credencials_aws.cpython-310.pyc | Bin 0 -> 786 bytes 5/{web => }/api.py | 0 5/credencials_aws.py | 8 +- 5/main.py | 14 +- 5/{web => }/requirements.txt | 0 5/test.ipynb | 214 ++++++++++++++++++ 7 files changed, 223 insertions(+), 13 deletions(-) rename 5/{web => }/Dockerfile (100%) create mode 100644 5/__pycache__/credencials_aws.cpython-310.pyc rename 5/{web => }/api.py (100%) rename 5/{web => }/requirements.txt (100%) create mode 100644 5/test.ipynb diff --git a/5/web/Dockerfile b/5/Dockerfile similarity index 100% rename from 5/web/Dockerfile rename to 5/Dockerfile diff --git a/5/__pycache__/credencials_aws.cpython-310.pyc b/5/__pycache__/credencials_aws.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7fd394162e0d2f1b1ec90a9b1283167009646bc3 GIT binary patch literal 786 zcmYjP%Wm3G5CuY{s8tq~(tXucBh+4E$AG1#s)HXG^Dr0#_QtxlFZhM8pBP(R^eg%g z?f-Pcwtvx8omQ%Hq&cIRInq2d`1<-qqsEVKKdrZ|M&s90@EpxL_*`3hQ0>NkucGRyHu=0!~&bHZfE-0Rglghos$V+Us`(o;pCBlpg7zU z`{RMaO5jalyjwaML+a-7$k4eMuW4;DQ)I5g&bi!-i2%mIX%2c6@LjIyG8`AW^FW4=SZ?9&N_p1Y|so11X+|&Ojd&UA5XK(@2734i~0RslK;>SlN~n0rq-A>_y>b zC~-O7(vIgBi_t_`?Zd%(!&=IO=Y)C5P*~fVkmL8NKxzx>H1JmY0j~0J9qzSqveGHS zwR;`CzI#C1o4d#13`*gWc4=zdcSgLq*40E}1GPi)Qw3?`2p|-w*`gQhA0DdL$H;o$ zCE=jkM>I0;Xo|2XdUo4Bzk)?-K+8gDYWS!$T>Jd`DU~8CLD{p)KV@3$TpSu1_EC6# z$#OiSPFBWR9)52mm-PqzrQctHnxZ5H2iL~|^dgv{#D{>zR(?QXmI5WqZ5M*+J~2cQ z$O-hb(yBriy{TZD;S?7zbfIB6hde0_YzD4Xq0q$z@_>%g3>F3nSXm#|jso?lHVQ2l h2K6pQ`XMUMS04}9&I9!6>A%yfn%a$av)#OE{Q;l%`3wL6 literal 0 HcmV?d00001 diff --git a/5/web/api.py b/5/api.py similarity index 100% rename from 5/web/api.py rename to 5/api.py diff --git a/5/credencials_aws.py b/5/credencials_aws.py index e2f1724..e4807fa 100644 --- a/5/credencials_aws.py +++ b/5/credencials_aws.py @@ -1,5 +1,5 @@ -aws_access_key_id='' -aws_secret_access_key='' -aws_session_token='' +aws_access_key_id='ASIARJHPFIYPJEGLCA4Z' +aws_secret_access_key='SFRh92Y/azqAw3tzcWUTHzZ3SIlUur2XVKx+A0Kn' +aws_session_token='FwoGZXIvYXdzEP3//////////wEaDOOrfcffhtGn7KRj7CLBAa/wm42gvm5oSljMaqfRhtA3ZbvvA9uIqV9YQNSPE+DOEubq2laqlxO/65ek/sjox2/Bx0E0qzzZsjefp7dqKQ9qmLPCYUUIi4hzbDGpQrExSoj6Y0rAST96uTUuMJF+9oyH2/+WJVDIHYFyPJ1Nrkj9bq/x/PoMqlKqOnh8Z8w0UzXrMpKa3uaQbk5UkjQaoSXyLGa66GqjpnWCKEuoX9hjkwQHLSTgn8VWDcSLFEGOyA5oYXbaEFgUG3YQ5x8jTg8ozunVngYyLTZ52id6+ZA4Zl9dW1BCNaKnR9i95J7hHFbVZUrKDlU+3FMSeYJp/eelUtQ6lg==' -DEFAULT_VPC = '' \ No newline at end of file +DEFAULT_VPC = 'vpc-03a807e9eb1952c59' diff --git a/5/main.py b/5/main.py index e99e948..96298f4 100644 --- a/5/main.py +++ b/5/main.py @@ -1,11 +1,7 @@ from credencials_aws import aws_access_key_id, aws_secret_access_key, aws_session_token, DEFAULT_VPC -import boto3, time - - - -INDEKS = "s444455" - +import boto3 +INDEKS = "olaa" key_name = f"{INDEKS}-key" security_group_name = f"{INDEKS}-security-group" @@ -15,12 +11,12 @@ user_data = f''' sudo yum update -y sudo yum install git -y git clone https://git.wmi.amu.edu.pl/s444455/DPZC_3.git -cd DPZC_3/5/web +cd DPZC_3/5 sudo yum install docker -y sudo service docker start sudo usermod -a -G docker ec2-user -docker build -t invoice . -docker run -d -p 80:8080 -t invoice +sudo docker build -t invoice . +sudo docker run -d -p 80:8080 -t invoice ''' if __name__ == '__main__': diff --git a/5/web/requirements.txt b/5/requirements.txt similarity index 100% rename from 5/web/requirements.txt rename to 5/requirements.txt diff --git a/5/test.ipynb b/5/test.ipynb new file mode 100644 index 0000000..e32d5c4 --- /dev/null +++ b/5/test.ipynb @@ -0,0 +1,214 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from fastapi import FastAPI, UploadFile\n", + "from tika import parser\n", + "import uvicorn, re\n", + "from fastapi.middleware.cors import CORSMiddleware" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NIP: 5741997874\n", + "Firma krzak sp.z.o.o\n", + "246.0\n" + ] + } + ], + "source": [ + "\n", + "\n", + "\n", + "faktura = parser.from_file(\"/Users/mikolajpaterka/Documents/Studia/Mgr_II/Praktycznie zastosowania_chmury_obliczeniowej/Zestaw_3/5_train/CFV 4_05_2021.pdf\")\n", + "dane = faktura['content']\n", + "dane = dane.split('\\n')\n", + "dane = [i for i in dane if i != '']\n", + "razem = list(filter(lambda x: 'Razem' in x, dane))\n", + "razem = float(re.findall(r'\\d+[.]\\d+', razem[0])[-1])\n", + "\n", + "sprzedawca = dane[dane.index('Sprzedawca:') : dane.index('Nabywca:')]\n", + "sprzedawca_nazwa = sprzedawca[1]\n", + "nip = sprzedawca[-1].replace('NIP: ', '')\n", + "\n", + "print(nip)\n", + "print(sprzedawca_nazwa)\n", + "print(razem)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-01-27 20:29:38,916 [MainThread ] [WARNI] Failed to see startup log message; retrying...\n", + "2023-01-27 20:29:43,923 [MainThread ] [WARNI] Failed to see startup log message; retrying...\n", + "2023-01-27 20:29:48,927 [MainThread ] [WARNI] Failed to see startup log message; retrying...\n", + "2023-01-27 20:29:53,933 [MainThread ] [ERROR] Tika startup log message not received after 3 tries.\n", + "2023-01-27 20:29:53,936 [MainThread ] [ERROR] Failed to receive startup confirmation from startServer.\n" + ] + }, + { + "ename": "RuntimeError", + "evalue": "Unable to start Tika server.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn [10], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39m# import parser object from tike\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtika\u001b[39;00m \u001b[39mimport\u001b[39;00m parser\n\u001b[0;32m----> 4\u001b[0m parsed_pdf \u001b[39m=\u001b[39m parser\u001b[39m.\u001b[39;49mfrom_file(\u001b[39m\"\u001b[39;49m\u001b[39mAFV 1_05_2021.pdf\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[1;32m 6\u001b[0m \u001b[39m# ['metadata'] attribute returns\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[39m# key-value pairs of meta-data\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[39mprint\u001b[39m(parsed_pdf[\u001b[39m'\u001b[39m\u001b[39mmetadata\u001b[39m\u001b[39m'\u001b[39m])\n", + "File \u001b[0;32m/opt/homebrew/lib/python3.10/site-packages/tika/parser.py:40\u001b[0m, in \u001b[0;36mfrom_file\u001b[0;34m(filename, serverEndpoint, service, xmlContent, headers, config_path, requestOptions, raw_response)\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[39m'''\u001b[39;00m\n\u001b[1;32m 25\u001b[0m \u001b[39mParses a file for metadata and content\u001b[39;00m\n\u001b[1;32m 26\u001b[0m \u001b[39m:param filename: path to file which needs to be parsed or binary file using open(path,'rb')\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[39m 'content' has a str value and metadata has a dict type value.\u001b[39;00m\n\u001b[1;32m 38\u001b[0m \u001b[39m'''\u001b[39;00m\n\u001b[1;32m 39\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m xmlContent:\n\u001b[0;32m---> 40\u001b[0m output \u001b[39m=\u001b[39m parse1(service, filename, serverEndpoint, headers\u001b[39m=\u001b[39;49mheaders, config_path\u001b[39m=\u001b[39;49mconfig_path, requestOptions\u001b[39m=\u001b[39;49mrequestOptions)\n\u001b[1;32m 41\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 42\u001b[0m output \u001b[39m=\u001b[39m parse1(service, filename, serverEndpoint, services\u001b[39m=\u001b[39m{\u001b[39m'\u001b[39m\u001b[39mmeta\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m'\u001b[39m\u001b[39m/meta\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mtext\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m'\u001b[39m\u001b[39m/tika\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mall\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m'\u001b[39m\u001b[39m/rmeta/xml\u001b[39m\u001b[39m'\u001b[39m},\n\u001b[1;32m 43\u001b[0m headers\u001b[39m=\u001b[39mheaders, config_path\u001b[39m=\u001b[39mconfig_path, requestOptions\u001b[39m=\u001b[39mrequestOptions)\n", + "File \u001b[0;32m/opt/homebrew/lib/python3.10/site-packages/tika/tika.py:337\u001b[0m, in \u001b[0;36mparse1\u001b[0;34m(option, urlOrPath, serverEndpoint, verbose, tikaServerJar, responseMimeType, services, rawResponse, headers, config_path, requestOptions)\u001b[0m\n\u001b[1;32m 335\u001b[0m headers\u001b[39m.\u001b[39mupdate({\u001b[39m'\u001b[39m\u001b[39mAccept\u001b[39m\u001b[39m'\u001b[39m: responseMimeType, \u001b[39m'\u001b[39m\u001b[39mContent-Disposition\u001b[39m\u001b[39m'\u001b[39m: make_content_disposition_header(path\u001b[39m.\u001b[39mencode(\u001b[39m'\u001b[39m\u001b[39mutf-8\u001b[39m\u001b[39m'\u001b[39m) \u001b[39mif\u001b[39;00m \u001b[39mtype\u001b[39m(path) \u001b[39mis\u001b[39;00m unicode_string \u001b[39melse\u001b[39;00m path)})\n\u001b[1;32m 336\u001b[0m \u001b[39mwith\u001b[39;00m urlOrPath \u001b[39mif\u001b[39;00m _is_file_object(urlOrPath) \u001b[39melse\u001b[39;00m \u001b[39mopen\u001b[39m(path, \u001b[39m'\u001b[39m\u001b[39mrb\u001b[39m\u001b[39m'\u001b[39m) \u001b[39mas\u001b[39;00m f:\n\u001b[0;32m--> 337\u001b[0m status, response \u001b[39m=\u001b[39m callServer(\u001b[39m'\u001b[39;49m\u001b[39mput\u001b[39;49m\u001b[39m'\u001b[39;49m, serverEndpoint, service, f,\n\u001b[1;32m 338\u001b[0m headers, verbose, tikaServerJar, config_path\u001b[39m=\u001b[39;49mconfig_path,\n\u001b[1;32m 339\u001b[0m rawResponse\u001b[39m=\u001b[39;49mrawResponse, requestOptions\u001b[39m=\u001b[39;49mrequestOptions)\n\u001b[1;32m 341\u001b[0m \u001b[39mif\u001b[39;00m file_type \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39mremote\u001b[39m\u001b[39m'\u001b[39m: os\u001b[39m.\u001b[39munlink(path)\n\u001b[1;32m 342\u001b[0m \u001b[39mreturn\u001b[39;00m (status, response)\n", + "File \u001b[0;32m/opt/homebrew/lib/python3.10/site-packages/tika/tika.py:532\u001b[0m, in \u001b[0;36mcallServer\u001b[0;34m(verb, serverEndpoint, service, data, headers, verbose, tikaServerJar, httpVerbs, classpath, rawResponse, config_path, requestOptions)\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[39mglobal\u001b[39;00m TikaClientOnly\n\u001b[1;32m 531\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m TikaClientOnly:\n\u001b[0;32m--> 532\u001b[0m serverEndpoint \u001b[39m=\u001b[39m checkTikaServer(scheme, serverHost, port, tikaServerJar, classpath, config_path)\n\u001b[1;32m 534\u001b[0m serviceUrl \u001b[39m=\u001b[39m serverEndpoint \u001b[39m+\u001b[39m service\n\u001b[1;32m 535\u001b[0m \u001b[39mif\u001b[39;00m verb \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m httpVerbs:\n", + "File \u001b[0;32m/opt/homebrew/lib/python3.10/site-packages/tika/tika.py:602\u001b[0m, in \u001b[0;36mcheckTikaServer\u001b[0;34m(scheme, serverHost, port, tikaServerJar, classpath, config_path)\u001b[0m\n\u001b[1;32m 600\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m status:\n\u001b[1;32m 601\u001b[0m log\u001b[39m.\u001b[39merror(\u001b[39m\"\u001b[39m\u001b[39mFailed to receive startup confirmation from startServer.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 602\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mUnable to start Tika server.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 603\u001b[0m \u001b[39mreturn\u001b[39;00m serverEndpoint\n", + "\u001b[0;31mRuntimeError\u001b[0m: Unable to start Tika server." + ] + } + ], + "source": [ + "# import parser object from tike\n", + "from tika import parser\n", + "\n", + "parsed_pdf = parser.from_file(\"AFV 1_05_2021.pdf\")\n", + "\n", + "# ['metadata'] attribute returns\n", + "# key-value pairs of meta-data\n", + "print(parsed_pdf['metadata'])\n", + "\n", + "# \n", + "print(type(parsed_pdf['metadata']))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import parser object from tike\n", + "from tika import parser\n", + "\n", + "# opening pdf file\n", + "parsed_pdf = parser.from_file(\"AFV 1_05_2021.pdf\")\n", + "\n", + "# saving content of pdf\n", + "# you can also bring text only, by parsed_pdf['text']\n", + "# parsed_pdf['content'] returns string\n", + "data = parsed_pdf['content']\n", + "\n", + "# Printing of content\n", + "print(data)\n", + "\n", + "# \n", + "print(type(data))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import tika\n", + "from tika import parser\n", + "parsed = parser.from_file('AFV 1_05_2021.pdf')\n", + "print(parsed[\"metadata\"])\n", + "print(parsed[\"content\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "curl -X POST 54.89.242.59:80/invoice -F \"file=@AFV 1_05_2021.pdf\"\n", + "\n", + "curl ec2-54-89-242-59.compute-1.amazonaws.com\n", + "\n", + "ec2-54-89-242-59.compute-1.amazonaws.com " + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "xd\n" + ] + } + ], + "source": [ + "WEBSERVICE_URL = \"100.24.236.135:80\"\n", + "\n", + "import requests\n", + "import glob\n", + "import sys\n", + "\n", + "print(\"xd\")\n", + "\n", + "txtfiles = []\n", + "\n", + "for file in glob.glob(\"./5_train/*.pdf\"):\n", + " print(f\"Testowanie pliku {file}\")\n", + " files = {'file': open(file, 'rb')}\n", + " response =requests.post(WEBSERVICE_URL + \"/invoice\", files=files)\n", + " try:\n", + " data = response.json()\n", + " if \"vat_id\" in data and \"address\" in data and \"total\" in data and \\\n", + " not data[\"vat_id\"] and not data[\"address\"] and isinstance(data[\"total\"], float):\n", + " print(f'OK: {data[\"vat_id\"]}\\t{data[\"address\"]}\\t{data[\"total\"]}')\n", + " else:\n", + " print(\"NIEPOPRAWNA ODPOWIEDŹ\")\n", + " except:\n", + " print(\"Błędna odpowiedź z serwera (%s):\"%(sys.exc_info()[0]), file=sys.stderr)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}