Add file handling for backend
This commit is contained in:
parent
b586f3c7ed
commit
e674b385e3
1
.gitignore
vendored
1
.gitignore
vendored
@ -143,3 +143,4 @@ cython_debug/
|
|||||||
.vscode
|
.vscode
|
||||||
|
|
||||||
db.sqlite3
|
db.sqlite3
|
||||||
|
translation/files/
|
@ -1,5 +1,7 @@
|
|||||||
|
from tracemalloc import start
|
||||||
from rest_framework.views import APIView
|
from rest_framework.views import APIView
|
||||||
from rest_framework.response import Response
|
from rest_framework.response import Response
|
||||||
|
from rest_framework.parsers import MultiPartParser
|
||||||
from websocket import create_connection
|
from websocket import create_connection
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
import subprocess
|
import subprocess
|
||||||
@ -7,15 +9,31 @@ from .serializers import TranslationSerializer
|
|||||||
from rest_framework import status
|
from rest_framework import status
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import regex as re
|
import regex as re
|
||||||
|
import uuid
|
||||||
|
import os
|
||||||
|
|
||||||
BASE_DIR = Path(__file__).resolve().parent
|
BASE_DIR = Path(__file__).resolve().parent
|
||||||
|
|
||||||
from rest_framework.decorators import api_view, renderer_classes
|
from rest_framework.decorators import api_view, renderer_classes, parser_classes
|
||||||
from .renderers import MyXMLRenderer
|
from .renderers import MyXMLRenderer
|
||||||
|
|
||||||
|
def get_context(line, start_index, end_index):
|
||||||
|
|
||||||
|
start_index = start_index - 20
|
||||||
|
end_index = end_index + 20
|
||||||
|
|
||||||
|
if start_index >=0 and end_index <= len(line) -1:
|
||||||
|
return '...' + line[start_index:end_index] + '...'
|
||||||
|
else:
|
||||||
|
if start_index < 0 and end_index <= len(line) -1:
|
||||||
|
return line[0:end_index] + '...'
|
||||||
|
elif start_index >= 0 and end_index > len(line) -1:
|
||||||
|
return '...'+line[start_index:len(line)-1]
|
||||||
|
else:
|
||||||
|
return line
|
||||||
|
|
||||||
def diff_text(original, corrected):
|
def diff_text(original, corrected):
|
||||||
lines = corrected.splitlines(1)
|
lines = corrected.splitlines(1)
|
||||||
original_lines = original.splitlines(1)
|
|
||||||
output = []
|
output = []
|
||||||
for idx, line in enumerate(lines):
|
for idx, line in enumerate(lines):
|
||||||
groups_found = re.findall('\[-([^\[]*?)\+}', line)
|
groups_found = re.findall('\[-([^\[]*?)\+}', line)
|
||||||
@ -25,42 +43,61 @@ def diff_text(original, corrected):
|
|||||||
removed = re.findall('^(.*?)\-]', group)
|
removed = re.findall('^(.*?)\-]', group)
|
||||||
added = re.findall('{\+(.*?)$', group)
|
added = re.findall('{\+(.*?)$', group)
|
||||||
|
|
||||||
output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': added[0], 'context': original_lines[idx], 'msg': f"Zamiana '{removed[0]}' na '{added[0]}'"})
|
start_position = re.search('\[-([^\[]*?)\+}', line).start()
|
||||||
|
end_position = re.search('\[-([^\[]*?)\+}', line).end()
|
||||||
|
|
||||||
|
|
||||||
|
output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': added[0], 'context': f'{get_context(lines[idx], start_position, end_position)}', 'msg': f"Zamiana '{removed[0]}' na '{added[0]}'"})
|
||||||
|
|
||||||
removed = re.findall('\[\-(.*?)\-]', new_line)
|
removed = re.findall('\[\-(.*?)\-]', new_line)
|
||||||
added = re.findall('{\+(.*?)\+}', new_line)
|
added = re.findall('{\+(.*?)\+}', new_line)
|
||||||
if removed is list:
|
if removed is list:
|
||||||
for remove in removed:
|
for remove in removed:
|
||||||
output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': "", 'context': original_lines[idx], 'msg': f"Usunięcie '{remove}"})
|
output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': "", 'context': f'{lines[idx]}', 'msg': f"Usunięcie '{remove}"})
|
||||||
if added is list:
|
if added is list:
|
||||||
for add in added:
|
for add in added:
|
||||||
output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': "", 'context': original_lines[idx], 'msg': f"Dodanie '{add}"})
|
output.append({'id': 'grammar-error', 'type': 'grammar', 'correction': "", 'context': f'{lines[idx]}', 'msg': f"Dodanie '{add}"})
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
@api_view(['POST'])
|
@api_view(['POST'])
|
||||||
|
@parser_classes([MultiPartParser])
|
||||||
@renderer_classes([MyXMLRenderer])
|
@renderer_classes([MyXMLRenderer])
|
||||||
def xml_translate(request):
|
def xml_translate(request, format=None):
|
||||||
serializer = TranslationSerializer(data=request.data)
|
|
||||||
|
|
||||||
if serializer.is_valid():
|
uploaded_file = request.FILES['file']
|
||||||
# Encode to BPE.
|
|
||||||
proc = subprocess.Popen(f'echo "{request.data["text"]}" | sh {BASE_DIR}/preprocess_text.sh', stdout=subprocess.PIPE, shell=True)
|
original_text = str(uploaded_file.read().decode('utf-8'))
|
||||||
output, err = proc.communicate()
|
# Save raw txt file
|
||||||
ws = create_connection(settings.TRANSLATION_WEBSOCKET)
|
raw_filename = uuid.uuid1()
|
||||||
text = output.decode('utf-8')
|
with open(f'./translation/files/{raw_filename}.txt', 'w') as file:
|
||||||
ws.send(text)
|
file.write(original_text)
|
||||||
result = ws.recv()
|
|
||||||
# Decode from BPE.
|
proc = subprocess.Popen(f'cat {BASE_DIR}/files/{raw_filename}.txt | sh {BASE_DIR}/preprocess_text.sh', stdout=subprocess.PIPE, shell=True)
|
||||||
sec_proc = subprocess.Popen(f'echo "{result.rstrip()}" | sh {BASE_DIR}/postprocess_text.sh', stdout=subprocess.PIPE, shell=True)
|
output, err = proc.communicate()
|
||||||
sec_output, err = sec_proc.communicate()
|
ws = create_connection(settings.TRANSLATION_WEBSOCKET)
|
||||||
# Decode from BPE.
|
text = output.decode('utf-8')
|
||||||
third_proc = subprocess.Popen(f'git diff $(echo "{request.data["text"]}" | git hash-object -w --stdin) $(echo "{sec_output.decode("utf-8").rstrip()}" | git hash-object -w --stdin) --word-diff | tail -n +6', stdout=subprocess.PIPE, shell=True)
|
ws.send(text)
|
||||||
third_output, err = third_proc.communicate()
|
result = ws.recv()
|
||||||
marked_errors = third_output.decode('utf-8').rstrip()
|
result_filename = uuid.uuid1()
|
||||||
return Response(diff_text(request.data["text"], marked_errors), status=status.HTTP_200_OK)
|
with open(f'./translation/files/{result_filename}.txt', 'w') as file:
|
||||||
else:
|
file.write(result)
|
||||||
return Response("Bad request", status=status.HTTP_400_BAD_REQUEST)
|
|
||||||
|
sec_proc = subprocess.Popen(f'cat {BASE_DIR}/files/{result_filename}.txt | sh {BASE_DIR}/postprocess_text.sh', stdout=subprocess.PIPE, shell=True)
|
||||||
|
sec_output, err = sec_proc.communicate()
|
||||||
|
sec_output_filename = uuid.uuid1()
|
||||||
|
with open(f'./translation/files/{sec_output_filename}.txt', 'w') as file:
|
||||||
|
file.write(sec_output.decode("utf-8"))
|
||||||
|
|
||||||
|
third_proc = subprocess.Popen(f'git diff $(cat {BASE_DIR}/files/{raw_filename}.txt | git hash-object -w --stdin) $(cat {BASE_DIR}/files/{sec_output_filename}.txt | git hash-object -w --stdin) --word-diff | tail -n +6', stdout=subprocess.PIPE, shell=True)
|
||||||
|
third_output, err = third_proc.communicate()
|
||||||
|
marked_errors = third_output.decode('utf-8').rstrip()
|
||||||
|
|
||||||
|
os.remove(f"./translation/files/{raw_filename}.txt")
|
||||||
|
os.remove(f"./translation/files/{result_filename}.txt")
|
||||||
|
os.remove(f"./translation/files/{sec_output_filename}.txt")
|
||||||
|
|
||||||
|
return Response(diff_text(original_text, marked_errors), status=status.HTTP_200_OK)
|
||||||
|
|
||||||
class TranslationAPIView(APIView):
|
class TranslationAPIView(APIView):
|
||||||
def post(self, request):
|
def post(self, request):
|
||||||
|
Loading…
Reference in New Issue
Block a user