57 lines
1.9 KiB
Python
57 lines
1.9 KiB
Python
from flask import send_file
|
|
from flask_restful import Resource, reqparse
|
|
import werkzeug
|
|
import time
|
|
import io
|
|
import itertools
|
|
import nltk
|
|
from nltk import tokenize
|
|
|
|
|
|
class Video(Resource):
|
|
def __init__(self, **kwargs):
|
|
super().__init__()
|
|
self.parser = reqparse.RequestParser()
|
|
self.model = kwargs['model']
|
|
self.tokenizer = kwargs['tokenizer']
|
|
self.parser.add_argument('file', required=True, type=werkzeug.datastructures.FileStorage, location='files')
|
|
|
|
def post(self):
|
|
try:
|
|
text_file = self.parser.parse_args().file
|
|
request_id = int(time.time())
|
|
text_path = "in/" + str(request_id) + '_pl.txt'
|
|
text_file.save(text_path)
|
|
self.run_on_video(text_path, request_id)
|
|
path_file = "out/" + str(request_id) + '_en.txt'
|
|
return send_file(path_file, as_attachment=True, conditional=True)
|
|
|
|
except Exception as e:
|
|
print(e)
|
|
outcome = 'fail'
|
|
return {'file_storage_result': outcome, 'error': e}
|
|
|
|
def run_on_video(self, file_path, request_id):
|
|
nltk.download('punkt')
|
|
|
|
with io.open(file_path, 'r', encoding='utf8') as f:
|
|
lines = f.readlines()
|
|
|
|
sentences = tokenize.sent_tokenize(' '.join(lines))
|
|
|
|
returns = []
|
|
for sentence in sentences:
|
|
model_inputs = self.tokenizer(sentence, return_tensors="pt")
|
|
|
|
generated_tokens = self.model.generate(
|
|
**model_inputs,
|
|
forced_bos_token_id=self.tokenizer.lang_code_to_id["en_XX"]
|
|
)
|
|
returns.append(self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True))
|
|
|
|
returns = list(itertools.chain(*returns))
|
|
|
|
with io.open('out/' + str(request_id) + '_en.txt', 'w', encoding='utf8') as f:
|
|
for line in returns:
|
|
f.write(line + ' ')
|