diff --git a/backend/webapp/prototype/filehandler/views.py b/backend/webapp/prototype/filehandler/views.py index c48679b..42942dc 100644 --- a/backend/webapp/prototype/filehandler/views.py +++ b/backend/webapp/prototype/filehandler/views.py @@ -20,6 +20,7 @@ def model_form_upload(request): if form.is_valid(): try: data = parseData(request.FILES['file']) + print(data) form.save() return JsonResponse(data, safe=False) except: diff --git a/backend/webapp/prototype/filehandler/xmlParser.py b/backend/webapp/prototype/filehandler/xmlParser.py index 7fdee29..7597140 100644 --- a/backend/webapp/prototype/filehandler/xmlParser.py +++ b/backend/webapp/prototype/filehandler/xmlParser.py @@ -2,7 +2,10 @@ import argparse from bs4 import BeautifulSoup +from postmarkup import render_bbcode import json +import html +import re import tempfile def parseData(file): @@ -30,17 +33,19 @@ def parseData(file): for d in soup.forum.find_all('discussion'): posts = [] for p in d.find_all('post'): + post_soup = BeautifulSoup(html.unescape(str(p.message)), "lxml") + paragraphs = [render_bbcode(x.text) for x in post_soup.find_all('p')] posts.append({ 'id': p.get('id'), 'parent': p.find('parent').text, 'author': p.userid.text, - 'message': p.message.get_text() + 'message': [x for x in paragraphs if x] }) - out['discussions'].append({ - 'id': d.get('id'), - 'title': d.find('name').text, - 'first_post': d.firstpost.text, - 'posts': posts - }) + out['discussions'].append({ + 'id': d.get('id'), + 'title': d.find('name').text, + 'first_post': d.firstpost.text, + 'posts': posts + }) fd.close() return(out)