init
This commit is contained in:
commit
98dc92beea
190
.gitignore
vendored
Normal file
190
.gitignore
vendored
Normal file
@ -0,0 +1,190 @@
|
||||
# Created by .ignore support plugin (hsz.mobi)
|
||||
# MY IGNORES
|
||||
model-en2pl
|
||||
|
||||
|
||||
|
||||
### Python template
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*,cover
|
||||
.hypothesis/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# IPython Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
# virtualenv
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
### VirtualEnv template
|
||||
# Virtualenv
|
||||
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
|
||||
[Bb]in
|
||||
[Ii]nclude
|
||||
[Ll]ib
|
||||
[Ll]ib64
|
||||
[Ll]ocal
|
||||
[Ss]cripts
|
||||
pyvenv.cfg
|
||||
.venv
|
||||
pip-selfcheck.json
|
||||
|
||||
### JetBrains template
|
||||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
||||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||
|
||||
# User-specific stuff
|
||||
.idea/**/workspace.xml
|
||||
.idea/**/tasks.xml
|
||||
.idea/**/usage.statistics.xml
|
||||
.idea/**/dictionaries
|
||||
.idea/**/shelf
|
||||
|
||||
# AWS User-specific
|
||||
.idea/**/aws.xml
|
||||
|
||||
# Generated files
|
||||
.idea/**/contentModel.xml
|
||||
|
||||
# Sensitive or high-churn files
|
||||
.idea/**/dataSources/
|
||||
.idea/**/dataSources.ids
|
||||
.idea/**/dataSources.local.xml
|
||||
.idea/**/sqlDataSources.xml
|
||||
.idea/**/dynamic.xml
|
||||
.idea/**/uiDesigner.xml
|
||||
.idea/**/dbnavigator.xml
|
||||
|
||||
# Gradle
|
||||
.idea/**/gradle.xml
|
||||
.idea/**/libraries
|
||||
|
||||
# Gradle and Maven with auto-import
|
||||
# When using Gradle or Maven with auto-import, you should exclude module files,
|
||||
# since they will be recreated, and may cause churn. Uncomment if using
|
||||
# auto-import.
|
||||
# .idea/artifacts
|
||||
# .idea/compiler.xml
|
||||
# .idea/jarRepositories.xml
|
||||
# .idea/modules.xml
|
||||
# .idea/*.iml
|
||||
# .idea/modules
|
||||
# *.iml
|
||||
# *.ipr
|
||||
|
||||
# CMake
|
||||
cmake-build-*/
|
||||
|
||||
# Mongo Explorer plugin
|
||||
.idea/**/mongoSettings.xml
|
||||
|
||||
# File-based project format
|
||||
*.iws
|
||||
|
||||
# IntelliJ
|
||||
out/
|
||||
|
||||
# mpeltonen/sbt-idea plugin
|
||||
.idea_modules/
|
||||
|
||||
# JIRA plugin
|
||||
atlassian-ide-plugin.xml
|
||||
|
||||
# Cursive Clojure plugin
|
||||
.idea/replstate.xml
|
||||
|
||||
# SonarLint plugin
|
||||
.idea/sonarlint/
|
||||
|
||||
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||
com_crashlytics_export_strings.xml
|
||||
crashlytics.properties
|
||||
crashlytics-build.properties
|
||||
fabric.properties
|
||||
|
||||
# Editor-based Rest Client
|
||||
.idea/httpRequests
|
||||
|
||||
# Android studio 3.1+ serialized cache file
|
||||
.idea/caches/build_file_checksums.ser
|
||||
|
||||
# idea folder, uncomment if you don't need it
|
||||
.idea
|
8
.idea/.gitignore
vendored
Normal file
8
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
22
.idea/inspectionProfiles/Project_Default.xml
Normal file
22
.idea/inspectionProfiles/Project_Default.xml
Normal file
@ -0,0 +1,22 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<profile version="1.0">
|
||||
<option name="myName" value="Project Default" />
|
||||
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||
<option name="ignoredPackages">
|
||||
<value>
|
||||
<list size="9">
|
||||
<item index="0" class="java.lang.String" itemvalue="Werkzeug" />
|
||||
<item index="1" class="java.lang.String" itemvalue="aniso8601" />
|
||||
<item index="2" class="java.lang.String" itemvalue="MarkupSafe" />
|
||||
<item index="3" class="java.lang.String" itemvalue="pytz" />
|
||||
<item index="4" class="java.lang.String" itemvalue="itsdangerous" />
|
||||
<item index="5" class="java.lang.String" itemvalue="click" />
|
||||
<item index="6" class="java.lang.String" itemvalue="Jinja2" />
|
||||
<item index="7" class="java.lang.String" itemvalue="Flask-RESTful" />
|
||||
<item index="8" class="java.lang.String" itemvalue="Flask" />
|
||||
</list>
|
||||
</value>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
</profile>
|
||||
</component>
|
6
.idea/misc.xml
Normal file
6
.idea/misc.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_17" project-jdk-name="Python 3.8 (pbr-private)" project-jdk-type="Python SDK">
|
||||
<output url="file://$PROJECT_DIR$/out" />
|
||||
</component>
|
||||
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/pbr-private.iml" filepath="$PROJECT_DIR$/.idea/pbr-private.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
11
.idea/pbr-private.iml
Normal file
11
.idea/pbr-private.iml
Normal file
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
7
.idea/vcs.xml
Normal file
7
.idea/vcs.xml
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$/mbart-large-50-one-to-many-mmt" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
22
Dockerfile
Normal file
22
Dockerfile
Normal file
@ -0,0 +1,22 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
WORKDIR app
|
||||
|
||||
COPY in in
|
||||
COPY out out
|
||||
COPY model-pl2en model-pl2en
|
||||
COPY translate.py .
|
||||
COPY resources resources
|
||||
COPY app.py .
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y python3-pip python3-dev \
|
||||
&& cd /usr/local/bin \
|
||||
&& ln -s /usr/bin/python3 python \
|
||||
&& pip3 install --upgrade pip
|
||||
|
||||
RUN pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN pip3 install protobuf==3.17.3 transformers nltk sentencepiece
|
||||
RUN pip3 install flask_restful werkzeug
|
||||
|
||||
CMD ["python3", "-m", "flask", "run", "--host=0.0.0.0"]
|
20
app.py
Normal file
20
app.py
Normal file
@ -0,0 +1,20 @@
|
||||
#!flask/bin/python
|
||||
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
|
||||
from flask import Flask
|
||||
from flask_restful import Api
|
||||
|
||||
|
||||
from resources.video import Video
|
||||
|
||||
|
||||
model = MBartForConditionalGeneration.from_pretrained("model-pl2en")
|
||||
tokenizer = MBart50TokenizerFast.from_pretrained("model-pl2en", src_lang="pl_PL")
|
||||
|
||||
app = Flask(__name__)
|
||||
api = Api(app)
|
||||
|
||||
api.add_resource(Video, '/api/video', resource_class_kwargs={'model': model,
|
||||
'tokenizer': tokenizer})
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True, host='0.0.0.0')
|
1
in/1654710055_pl.txt
Normal file
1
in/1654710055_pl.txt
Normal file
@ -0,0 +1 @@
|
||||
przepis na ciasto bananowe jest zaskakująco prosty.
|
1
in/1654710117_pl.txt
Normal file
1
in/1654710117_pl.txt
Normal file
@ -0,0 +1 @@
|
||||
przepis na ciasto bananowe jest zaskakująco prosty.
|
1
in/1654710131_pl.txt
Normal file
1
in/1654710131_pl.txt
Normal file
@ -0,0 +1 @@
|
||||
przepis na ciasto bananowe jest zaskakująco prosty.
|
2
in/1654710175_pl.txt
Normal file
2
in/1654710175_pl.txt
Normal file
@ -0,0 +1,2 @@
|
||||
przepis na ciasto bananowe jest zaskakująco prosty.
|
||||
przepis na ciasto bananowe jest zaskakująco prosty i skuteczny.
|
3
readme.md
Normal file
3
readme.md
Normal file
@ -0,0 +1,3 @@
|
||||
# PBR TransFix translator
|
||||
|
||||
Docker with a POST endpoint that receives a file in Polish and returns its English translation.
|
56
resources/video.py
Normal file
56
resources/video.py
Normal file
@ -0,0 +1,56 @@
|
||||
from flask import send_file
|
||||
from flask_restful import Resource, reqparse
|
||||
import werkzeug
|
||||
import time
|
||||
import io
|
||||
import itertools
|
||||
import nltk
|
||||
from nltk import tokenize
|
||||
|
||||
|
||||
class Video(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__()
|
||||
self.parser = reqparse.RequestParser()
|
||||
self.model = kwargs['model']
|
||||
self.tokenizer = kwargs['tokenizer']
|
||||
self.parser.add_argument('file', required=True, type=werkzeug.datastructures.FileStorage, location='files')
|
||||
|
||||
def post(self):
|
||||
try:
|
||||
text_file = self.parser.parse_args().file
|
||||
request_id = int(time.time())
|
||||
text_path = "in/" + str(request_id) + '_pl.txt'
|
||||
text_file.save(text_path)
|
||||
self.run_on_video(text_path, request_id)
|
||||
path_file = "out/" + str(request_id) + '_en.txt'
|
||||
return send_file(path_file, as_attachment=True, conditional=True)
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
outcome = 'fail'
|
||||
return {'file_storage_result': outcome, 'error': e}
|
||||
|
||||
def run_on_video(self, file_path, request_id):
|
||||
nltk.download('punkt')
|
||||
|
||||
with io.open(file_path, 'r', encoding='utf8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
sentences = tokenize.sent_tokenize(' '.join(lines))
|
||||
|
||||
returns = []
|
||||
for sentence in sentences:
|
||||
model_inputs = self.tokenizer(sentence, return_tensors="pt")
|
||||
|
||||
generated_tokens = self.model.generate(
|
||||
**model_inputs,
|
||||
forced_bos_token_id=self.tokenizer.lang_code_to_id["en_XX"]
|
||||
)
|
||||
returns.append(self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True))
|
||||
|
||||
returns = list(itertools.chain(*returns))
|
||||
|
||||
with io.open('out/' + str(request_id) + '_en.txt', 'w', encoding='utf8') as f:
|
||||
for line in returns:
|
||||
f.write(line + ' ')
|
Loading…
Reference in New Issue
Block a user