xml parser added

This commit is contained in:
Karolina Boczoń 2020-04-23 18:05:51 +00:00
parent 11bb40bc13
commit 5d1d29bb00

32
backend/xmlParser.py Normal file
View File

@ -0,0 +1,32 @@
#!/usr/bin/env python3
import argparse
from bs4 import BeautifulSoup
import json
# arguments
parser = argparse.ArgumentParser(description='Process some xml files.')
parser.add_argument('filename', help='xml forum file')
args = parser.parse_args()
# make a soup
with open(args.filename) as forum:
soup = BeautifulSoup(forum, "xml")
# put json together
out = {}
out['id'] = soup.forum.get('id')
out['name'] = soup.forum.find('name').text
out['discussions'] = []
for d in soup.forum.find_all('discussion'):
out['discussions'].append({
'id': d.get('id'),
'title': d.find('name').text,
'posts': [
{'id': p.get('id'), 'author': p.userid.text,
'message': p.message.get_text()} for p in d.find_all('post')]
})
with open('parsed.json', 'w') as outfile:
json.dump(out, outfile, ensure_ascii=False, indent=2)