xml parser added
This commit is contained in:
parent
11bb40bc13
commit
5d1d29bb00
32
backend/xmlParser.py
Normal file
32
backend/xmlParser.py
Normal file
@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
|
||||
|
||||
# arguments
|
||||
parser = argparse.ArgumentParser(description='Process some xml files.')
|
||||
parser.add_argument('filename', help='xml forum file')
|
||||
args = parser.parse_args()
|
||||
|
||||
# make a soup
|
||||
with open(args.filename) as forum:
|
||||
soup = BeautifulSoup(forum, "xml")
|
||||
|
||||
# put json together
|
||||
out = {}
|
||||
out['id'] = soup.forum.get('id')
|
||||
out['name'] = soup.forum.find('name').text
|
||||
out['discussions'] = []
|
||||
for d in soup.forum.find_all('discussion'):
|
||||
out['discussions'].append({
|
||||
'id': d.get('id'),
|
||||
'title': d.find('name').text,
|
||||
'posts': [
|
||||
{'id': p.get('id'), 'author': p.userid.text,
|
||||
'message': p.message.get_text()} for p in d.find_all('post')]
|
||||
})
|
||||
|
||||
with open('parsed.json', 'w') as outfile:
|
||||
json.dump(out, outfile, ensure_ascii=False, indent=2)
|
Loading…
Reference in New Issue
Block a user