remove HTML tags from messages
This commit is contained in:
parent
ecb4b1148b
commit
1735d9ddb8
@ -20,17 +20,20 @@ out['id'] = soup.forum.get('id')
|
||||
out['name'] = soup.forum.find('name').text
|
||||
out['discussions'] = []
|
||||
for d in soup.forum.find_all('discussion'):
|
||||
posts = []
|
||||
for p in d.find_all('post'):
|
||||
message_soup = BeautifulSoup(p.message.get_text(), "xml")
|
||||
posts.append({
|
||||
'id': p.get('id'),
|
||||
'parent': p.find('parent').text,
|
||||
'author': p.userid.text,
|
||||
'message': message_soup.get_text()
|
||||
})
|
||||
out['discussions'].append({
|
||||
'id': d.get('id'),
|
||||
'title': d.find('name').text,
|
||||
'first_post': d.firstpost.text,
|
||||
'posts': [
|
||||
{
|
||||
'id': p.get('id'),
|
||||
'parent': p.find('parent').text,
|
||||
'author': p.userid.text,
|
||||
'message': p.message.get_text()
|
||||
} for p in d.find_all('post')]
|
||||
'posts': posts
|
||||
})
|
||||
|
||||
with open('parsed.json', 'w') as outfile:
|
||||
|
Loading…
Reference in New Issue
Block a user