Implemented parser
This commit is contained in:
parent
40675036d5
commit
aec4e7aa35
@ -20,6 +20,7 @@ def model_form_upload(request):
|
||||
if form.is_valid():
|
||||
try:
|
||||
data = parseData(request.FILES['file'])
|
||||
print(data)
|
||||
form.save()
|
||||
return JsonResponse(data, safe=False)
|
||||
except:
|
||||
|
@ -2,7 +2,10 @@
|
||||
|
||||
import argparse
|
||||
from bs4 import BeautifulSoup
|
||||
from postmarkup import render_bbcode
|
||||
import json
|
||||
import html
|
||||
import re
|
||||
import tempfile
|
||||
|
||||
def parseData(file):
|
||||
@ -30,11 +33,13 @@ def parseData(file):
|
||||
for d in soup.forum.find_all('discussion'):
|
||||
posts = []
|
||||
for p in d.find_all('post'):
|
||||
post_soup = BeautifulSoup(html.unescape(str(p.message)), "lxml")
|
||||
paragraphs = [render_bbcode(x.text) for x in post_soup.find_all('p')]
|
||||
posts.append({
|
||||
'id': p.get('id'),
|
||||
'parent': p.find('parent').text,
|
||||
'author': p.userid.text,
|
||||
'message': p.message.get_text()
|
||||
'message': [x for x in paragraphs if x]
|
||||
})
|
||||
out['discussions'].append({
|
||||
'id': d.get('id'),
|
||||
|
Loading…
Reference in New Issue
Block a user