diff --git a/.gitignore b/.gitignore index 308a5a1..61db740 100644 --- a/.gitignore +++ b/.gitignore @@ -143,3 +143,4 @@ cython_debug/ .vscode db.sqlite3 +translation/files/ \ No newline at end of file diff --git a/backend/translation/views.py b/backend/translation/views.py index 1c67850..65d41af 100644 --- a/backend/translation/views.py +++ b/backend/translation/views.py @@ -1,5 +1,7 @@ +from tracemalloc import start from rest_framework.views import APIView from rest_framework.response import Response +from rest_framework.parsers import MultiPartParser from websocket import create_connection from django.conf import settings import subprocess @@ -7,15 +9,31 @@ from .serializers import TranslationSerializer from rest_framework import status from pathlib import Path import regex as re +import uuid +import os BASE_DIR = Path(__file__).resolve().parent -from rest_framework.decorators import api_view, renderer_classes +from rest_framework.decorators import api_view, renderer_classes, parser_classes from .renderers import MyXMLRenderer +def get_context(line, start_index, end_index): + + start_index = start_index - 20 + end_index = end_index + 20 + + if start_index >=0 and end_index <= len(line) -1: + return '...' + line[start_index:end_index] + '...' + else: + if start_index < 0 and end_index <= len(line) -1: + return line[0:end_index] + '...' + elif start_index >= 0 and end_index > len(line) -1: + return '...'+line[start_index:len(line)-1] + else: + return line + def diff_text(original, corrected): lines = corrected.splitlines(1) - original_lines = original.splitlines(1) output = [] for idx, line in enumerate(lines): groups_found = re.findall('\[-([^\[]*?)\+}', line) @@ -25,42 +43,61 @@ def diff_text(original, corrected): removed = re.findall('^(.*?)\-]', group) added = re.findall('{\+(.*?)$', group) - output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': added[0], 'context': original_lines[idx], 'msg': f"Zamiana '{removed[0]}' na '{added[0]}'"}) + start_position = re.search('\[-([^\[]*?)\+}', line).start() + end_position = re.search('\[-([^\[]*?)\+}', line).end() + + + output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': added[0], 'context': f'{get_context(lines[idx], start_position, end_position)}', 'msg': f"Zamiana '{removed[0]}' na '{added[0]}'"}) removed = re.findall('\[\-(.*?)\-]', new_line) added = re.findall('{\+(.*?)\+}', new_line) if removed is list: for remove in removed: - output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': "", 'context': original_lines[idx], 'msg': f"Usunięcie '{remove}"}) + output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': "", 'context': f'{lines[idx]}', 'msg': f"Usunięcie '{remove}"}) if added is list: for add in added: - output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': "", 'context': original_lines[idx], 'msg': f"Dodanie '{add}"}) + output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': "", 'context': f'{lines[idx]}', 'msg': f"Dodanie '{add}"}) return output @api_view(['POST']) +@parser_classes([MultiPartParser]) @renderer_classes([MyXMLRenderer]) -def xml_translate(request): - serializer = TranslationSerializer(data=request.data) +def xml_translate(request, format=None): - if serializer.is_valid(): - # Encode to BPE. - proc = subprocess.Popen(f'echo "{request.data["text"]}" | sh {BASE_DIR}/preprocess_text.sh', stdout=subprocess.PIPE, shell=True) - output, err = proc.communicate() - ws = create_connection(settings.TRANSLATION_WEBSOCKET) - text = output.decode('utf-8') - ws.send(text) - result = ws.recv() - # Decode from BPE. - sec_proc = subprocess.Popen(f'echo "{result.rstrip()}" | sh {BASE_DIR}/postprocess_text.sh', stdout=subprocess.PIPE, shell=True) - sec_output, err = sec_proc.communicate() - # Decode from BPE. - third_proc = subprocess.Popen(f'git diff $(echo "{request.data["text"]}" | git hash-object -w --stdin) $(echo "{sec_output.decode("utf-8").rstrip()}" | git hash-object -w --stdin) --word-diff | tail -n +6', stdout=subprocess.PIPE, shell=True) - third_output, err = third_proc.communicate() - marked_errors = third_output.decode('utf-8').rstrip() - return Response(diff_text(request.data["text"], marked_errors), status=status.HTTP_200_OK) - else: - return Response("Bad request", status=status.HTTP_400_BAD_REQUEST) + uploaded_file = request.FILES['file'] + + original_text = str(uploaded_file.read().decode('utf-8')) + # Save raw txt file + raw_filename = uuid.uuid1() + with open(f'./translation/files/{raw_filename}.txt', 'w') as file: + file.write(original_text) + + proc = subprocess.Popen(f'cat {BASE_DIR}/files/{raw_filename}.txt | sh {BASE_DIR}/preprocess_text.sh', stdout=subprocess.PIPE, shell=True) + output, err = proc.communicate() + ws = create_connection(settings.TRANSLATION_WEBSOCKET) + text = output.decode('utf-8') + ws.send(text) + result = ws.recv() + result_filename = uuid.uuid1() + with open(f'./translation/files/{result_filename}.txt', 'w') as file: + file.write(result) + + sec_proc = subprocess.Popen(f'cat {BASE_DIR}/files/{result_filename}.txt | sh {BASE_DIR}/postprocess_text.sh', stdout=subprocess.PIPE, shell=True) + sec_output, err = sec_proc.communicate() + sec_output_filename = uuid.uuid1() + with open(f'./translation/files/{sec_output_filename}.txt', 'w') as file: + file.write(sec_output.decode("utf-8")) + + third_proc = subprocess.Popen(f'git diff $(cat {BASE_DIR}/files/{raw_filename}.txt | git hash-object -w --stdin) $(cat {BASE_DIR}/files/{sec_output_filename}.txt | git hash-object -w --stdin) --word-diff | tail -n +6', stdout=subprocess.PIPE, shell=True) + third_output, err = third_proc.communicate() + marked_errors = third_output.decode('utf-8').rstrip() + + os.remove(f"./translation/files/{raw_filename}.txt") + os.remove(f"./translation/files/{result_filename}.txt") + os.remove(f"./translation/files/{sec_output_filename}.txt") + + return Response(diff_text(original_text, marked_errors), status=status.HTTP_200_OK) class TranslationAPIView(APIView): def post(self, request):