#!/usr/bin/env python3 import argparse from bs4 import BeautifulSoup import json # arguments parser = argparse.ArgumentParser(description='Process some xml files.') parser.add_argument('filename', help='xml forum file') args = parser.parse_args() # make a soup with open(args.filename) as forum: soup = BeautifulSoup(forum, "xml") # put json together out = {} out['id'] = soup.forum.get('id') out['name'] = soup.forum.find('name').text out['discussions'] = [] for d in soup.forum.find_all('discussion'): posts = [] for p in d.find_all('post'): message_soup = BeautifulSoup(p.message.get_text(), "xml") posts.append({ 'id': p.get('id'), 'parent': p.find('parent').text, 'author': p.userid.text, 'message': message_soup.get_text() }) out['discussions'].append({ 'id': d.get('id'), 'title': d.find('name').text, 'first_post': d.firstpost.text, 'posts': posts }) with open('parsed.json', 'w') as outfile: json.dump(out, outfile, ensure_ascii=False, indent=2)