siulkilulki
382666c563
Small changes to annotator.py (to be deleted in near future) Add utils/iterator Add redis to enviroment.yml Rename, adapt and move rule based extractor. Adapt find_hours. Yapify webapp app (probalby nothing more) Rename buttons in index.html
45 lines
1.1 KiB
Python
Executable File
45 lines
1.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import jsonlines
|
|
from extractor.find_hours import hours_iterator
|
|
from parishwebsites.parish2text import Parish2Text
|
|
import os
|
|
import random
|
|
|
|
parish2text = Parish2Text()
|
|
|
|
CONTEXT = 100
|
|
|
|
|
|
def process_parish_page(parish_page):
|
|
content = parish_page.pop('content')
|
|
for utterance, utterance_colored in hours_iterator(content, color=True):
|
|
print(utterance_colored)
|
|
import ipdb
|
|
ipdb.set_trace()
|
|
|
|
|
|
def process_parish_file(parish_reader):
|
|
for parish_page in parish_reader:
|
|
parish_page = parish2text.convert(parish_page)
|
|
import ipdb
|
|
ipdb.set_trace()
|
|
process_parish_page(parish_page)
|
|
|
|
|
|
def process_directory(directory):
|
|
for root, dirs, files in os.walk(directory):
|
|
random.shuffle(files)
|
|
for fname in files:
|
|
filepath = os.path.join(root, fname)
|
|
if os.path.getsize(filepath) > 0:
|
|
with jsonlines.open(filepath) as parish_reader:
|
|
process_parish_file(parish_reader)
|
|
|
|
|
|
def main():
|
|
process_directory('./parishwebsites/data')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|