From 5d1d29bb00349fe3fa21fd218c14c5cd2d54b7f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karolina=20Boczo=C5=84?= Date: Thu, 23 Apr 2020 18:05:51 +0000 Subject: [PATCH] xml parser added --- backend/xmlParser.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 backend/xmlParser.py diff --git a/backend/xmlParser.py b/backend/xmlParser.py new file mode 100644 index 0000000..276e5fb --- /dev/null +++ b/backend/xmlParser.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +import argparse +from bs4 import BeautifulSoup +import json + + +# arguments +parser = argparse.ArgumentParser(description='Process some xml files.') +parser.add_argument('filename', help='xml forum file') +args = parser.parse_args() + +# make a soup +with open(args.filename) as forum: + soup = BeautifulSoup(forum, "xml") + +# put json together +out = {} +out['id'] = soup.forum.get('id') +out['name'] = soup.forum.find('name').text +out['discussions'] = [] +for d in soup.forum.find_all('discussion'): + out['discussions'].append({ + 'id': d.get('id'), + 'title': d.find('name').text, + 'posts': [ + {'id': p.get('id'), 'author': p.userid.text, + 'message': p.message.get_text()} for p in d.find_all('post')] + }) + +with open('parsed.json', 'w') as outfile: + json.dump(out, outfile, ensure_ascii=False, indent=2)