from rest_framework.views import APIView from rest_framework.response import Response from websocket import create_connection from django.conf import settings import subprocess from .serializers import TranslationSerializer from rest_framework import status from pathlib import Path import regex as re BASE_DIR = Path(__file__).resolve().parent from rest_framework.decorators import api_view, renderer_classes from .renderers import MyXMLRenderer def diff_text(original, corrected): lines = corrected.splitlines(1) original_lines = original.splitlines(1) output = [] for idx, line in enumerate(lines): groups_found = re.findall('\[-([^\[]*?)\+}', line) new_line = re.sub('\[-([^\[]*?)\+}', '', line) for group in groups_found: removed = re.findall('^(.*?)\-]', group) added = re.findall('{\+(.*?)$', group) output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': added[0], 'context': original_lines[idx], 'msg': f"Zamiana '{removed[0]}' na '{added[0]}'"}) removed = re.findall('\[\-(.*?)\-]', new_line) added = re.findall('{\+(.*?)\+}', new_line) if removed is list: for remove in removed: output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': "", 'context': original_lines[idx], 'msg': f"Usunięcie '{remove}"}) if added is list: for add in added: output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': "", 'context': original_lines[idx], 'msg': f"Dodanie '{add}"}) return output @api_view(['POST']) @renderer_classes([MyXMLRenderer]) def xml_translate(request): serializer = TranslationSerializer(data=request.data) if serializer.is_valid(): # Encode to BPE. proc = subprocess.Popen(f'echo "{request.data["text"]}" | sh {BASE_DIR}/preprocess_text.sh', stdout=subprocess.PIPE, shell=True) output, err = proc.communicate() ws = create_connection(settings.TRANSLATION_WEBSOCKET) text = output.decode('utf-8') ws.send(text) result = ws.recv() # Decode from BPE. sec_proc = subprocess.Popen(f'echo "{result.rstrip()}" | sh {BASE_DIR}/postprocess_text.sh', stdout=subprocess.PIPE, shell=True) sec_output, err = sec_proc.communicate() # Decode from BPE. third_proc = subprocess.Popen(f'git diff $(echo "{request.data["text"]}" | git hash-object -w --stdin) $(echo "{sec_output.decode("utf-8").rstrip()}" | git hash-object -w --stdin) --word-diff | tail -n +6', stdout=subprocess.PIPE, shell=True) third_output, err = third_proc.communicate() marked_errors = third_output.decode('utf-8').rstrip() return Response(diff_text(request.data["text"], marked_errors), status=status.HTTP_200_OK) else: return Response("Bad request", status=status.HTTP_400_BAD_REQUEST) class TranslationAPIView(APIView): def post(self, request): serializer = TranslationSerializer(data=request.data) if serializer.is_valid(): # Encode to BPE. proc = subprocess.Popen(f'echo "{request.data["text"]}" | sh {BASE_DIR}/preprocess_text.sh', stdout=subprocess.PIPE, shell=True) output, err = proc.communicate() ws = create_connection(settings.TRANSLATION_WEBSOCKET) text = output.decode('utf-8') ws.send(text) result = ws.recv() # Decode from BPE. sec_proc = subprocess.Popen(f'echo "{result.rstrip()}" | sh {BASE_DIR}/postprocess_text.sh', stdout=subprocess.PIPE, shell=True) sec_output, err = sec_proc.communicate() # Decode from BPE. third_proc = subprocess.Popen(f'git diff $(echo "{request.data["text"]}" | git hash-object -w --stdin) $(echo "{sec_output.decode("utf-8").rstrip()}" | git hash-object -w --stdin) --word-diff | tail -n +6', stdout=subprocess.PIPE, shell=True) third_output, err = third_proc.communicate() marked_errors = third_output.decode('utf-8').rstrip() return Response({'corrected_text': sec_output.decode('utf-8').rstrip(), "errors": marked_errors}, status=status.HTTP_200_OK) else: return Response("Bad request", status=status.HTTP_400_BAD_REQUEST)