grammatical-error-correctio.../backend/translation/views.py
Wojciech Jarmosz b586f3c7ed GEC-backend
2022-06-01 10:19:32 +02:00

87 lines
4.2 KiB
Python

from rest_framework.views import APIView
from rest_framework.response import Response
from websocket import create_connection
from django.conf import settings
import subprocess
from .serializers import TranslationSerializer
from rest_framework import status
from pathlib import Path
import regex as re
BASE_DIR = Path(__file__).resolve().parent
from rest_framework.decorators import api_view, renderer_classes
from .renderers import MyXMLRenderer
def diff_text(original, corrected):
lines = corrected.splitlines(1)
original_lines = original.splitlines(1)
output = []
for idx, line in enumerate(lines):
groups_found = re.findall('\[-([^\[]*?)\+}', line)
new_line = re.sub('\[-([^\[]*?)\+}', '', line)
for group in groups_found:
removed = re.findall('^(.*?)\-]', group)
added = re.findall('{\+(.*?)$', group)
output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': added[0], 'context': original_lines[idx], 'msg': f"Zamiana '{removed[0]}' na '{added[0]}'"})
removed = re.findall('\[\-(.*?)\-]', new_line)
added = re.findall('{\+(.*?)\+}', new_line)
if removed is list:
for remove in removed:
output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': "", 'context': original_lines[idx], 'msg': f"Usunięcie '{remove}"})
if added is list:
for add in added:
output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': "", 'context': original_lines[idx], 'msg': f"Dodanie '{add}"})
return output
@api_view(['POST'])
@renderer_classes([MyXMLRenderer])
def xml_translate(request):
serializer = TranslationSerializer(data=request.data)
if serializer.is_valid():
# Encode to BPE.
proc = subprocess.Popen(f'echo "{request.data["text"]}" | sh {BASE_DIR}/preprocess_text.sh', stdout=subprocess.PIPE, shell=True)
output, err = proc.communicate()
ws = create_connection(settings.TRANSLATION_WEBSOCKET)
text = output.decode('utf-8')
ws.send(text)
result = ws.recv()
# Decode from BPE.
sec_proc = subprocess.Popen(f'echo "{result.rstrip()}" | sh {BASE_DIR}/postprocess_text.sh', stdout=subprocess.PIPE, shell=True)
sec_output, err = sec_proc.communicate()
# Decode from BPE.
third_proc = subprocess.Popen(f'git diff $(echo "{request.data["text"]}" | git hash-object -w --stdin) $(echo "{sec_output.decode("utf-8").rstrip()}" | git hash-object -w --stdin) --word-diff | tail -n +6', stdout=subprocess.PIPE, shell=True)
third_output, err = third_proc.communicate()
marked_errors = third_output.decode('utf-8').rstrip()
return Response(diff_text(request.data["text"], marked_errors), status=status.HTTP_200_OK)
else:
return Response("Bad request", status=status.HTTP_400_BAD_REQUEST)
class TranslationAPIView(APIView):
def post(self, request):
serializer = TranslationSerializer(data=request.data)
if serializer.is_valid():
# Encode to BPE.
proc = subprocess.Popen(f'echo "{request.data["text"]}" | sh {BASE_DIR}/preprocess_text.sh', stdout=subprocess.PIPE, shell=True)
output, err = proc.communicate()
ws = create_connection(settings.TRANSLATION_WEBSOCKET)
text = output.decode('utf-8')
ws.send(text)
result = ws.recv()
# Decode from BPE.
sec_proc = subprocess.Popen(f'echo "{result.rstrip()}" | sh {BASE_DIR}/postprocess_text.sh', stdout=subprocess.PIPE, shell=True)
sec_output, err = sec_proc.communicate()
# Decode from BPE.
third_proc = subprocess.Popen(f'git diff $(echo "{request.data["text"]}" | git hash-object -w --stdin) $(echo "{sec_output.decode("utf-8").rstrip()}" | git hash-object -w --stdin) --word-diff | tail -n +6', stdout=subprocess.PIPE, shell=True)
third_output, err = third_proc.communicate()
marked_errors = third_output.decode('utf-8').rstrip()
return Response({'corrected_text': sec_output.decode('utf-8').rstrip(), "errors": marked_errors}, status=status.HTTP_200_OK)
else:
return Response("Bad request", status=status.HTTP_400_BAD_REQUEST)