from flask import send_file from flask_restful import Resource, reqparse import werkzeug import time import io import itertools import nltk from nltk import tokenize class Video(Resource): def __init__(self, **kwargs): super().__init__() self.parser = reqparse.RequestParser() self.model = kwargs['model'] self.tokenizer = kwargs['tokenizer'] self.parser.add_argument('file', required=True, type=werkzeug.datastructures.FileStorage, location='files') def post(self): try: text_file = self.parser.parse_args().file request_id = int(time.time()) text_path = "in/" + str(request_id) + '_pl.txt' text_file.save(text_path) self.run_on_video(text_path, request_id) path_file = "out/" + str(request_id) + '_en.txt' return send_file(path_file, as_attachment=True, conditional=True) except Exception as e: print(e) outcome = 'fail' return {'file_storage_result': outcome, 'error': e} def run_on_video(self, file_path, request_id): nltk.download('punkt') with io.open(file_path, 'r', encoding='utf8') as f: lines = f.readlines() sentences = tokenize.sent_tokenize(' '.join(lines)) returns = [] for sentence in sentences: model_inputs = self.tokenizer(sentence, return_tensors="pt") generated_tokens = self.model.generate( **model_inputs, forced_bos_token_id=self.tokenizer.lang_code_to_id["en_XX"] ) returns.append(self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)) returns = list(itertools.chain(*returns)) with io.open('out/' + str(request_id) + '_en.txt', 'w', encoding='utf8') as f: for line in returns: f.write(line + ' ')