Implemented parser

This commit is contained in:
Marcin Armacki 2020-06-13 10:29:24 +02:00
parent 40675036d5
commit aec4e7aa35
2 changed files with 13 additions and 7 deletions

View File

@ -20,6 +20,7 @@ def model_form_upload(request):
if form.is_valid():
try:
data = parseData(request.FILES['file'])
print(data)
form.save()
return JsonResponse(data, safe=False)
except:

View File

@ -2,7 +2,10 @@
import argparse
from bs4 import BeautifulSoup
from postmarkup import render_bbcode
import json
import html
import re
import tempfile
def parseData(file):
@ -30,17 +33,19 @@ def parseData(file):
for d in soup.forum.find_all('discussion'):
posts = []
for p in d.find_all('post'):
post_soup = BeautifulSoup(html.unescape(str(p.message)), "lxml")
paragraphs = [render_bbcode(x.text) for x in post_soup.find_all('p')]
posts.append({
'id': p.get('id'),
'parent': p.find('parent').text,
'author': p.userid.text,
'message': p.message.get_text()
'message': [x for x in paragraphs if x]
})
out['discussions'].append({
'id': d.get('id'),
'title': d.find('name').text,
'first_post': d.firstpost.text,
'posts': posts
})
out['discussions'].append({
'id': d.get('id'),
'title': d.find('name').text,
'first_post': d.firstpost.text,
'posts': posts
})
fd.close()
return(out)