Add raw data viewer.

This commit is contained in:
Dawid Jurkiewicz 2018-03-30 22:04:14 +02:00
parent 88c55891f4
commit 56f704630e

28
parishwebsites/view_raw_data.py Executable file
View File

@ -0,0 +1,28 @@
#!/usr/bin/env python3
import jsonlines
import sys
import html2text
import pprint
def convert_html_to_text(parish, text_maker):
html = parish['content']
text = text_maker.handle(html)
parish['content'] = text
return parish
def main():
text_maker = html2text.HTML2Text()
text_maker.ignore_links = True
text_maker.ignore_images = True
writer = jsonlines.Writer(sys.stdout)
# text_maker.wrap_links = False
# text_maker.strong_mark = ''
with jsonlines.open(sys.argv[1]) as reader:
for parish in reader:
parish = convert_html_to_text(parish, text_maker)
pprint.pprint(parish)
if __name__ == '__main__':
main()