49 lines
1.6 KiB
Python
49 lines
1.6 KiB
Python
from mongo.helpers import get_mongo_collection
|
|
import argparse
|
|
import logging
|
|
from glob import glob
|
|
from os import path
|
|
import xml.etree.ElementTree as ET
|
|
from bson.objectid import ObjectId
|
|
from datetime import datetime
|
|
|
|
|
|
def main(args):
|
|
loglevel = args.loglevel
|
|
input_path = args.input
|
|
numeric_level = getattr(logging, loglevel.upper(), 10)
|
|
logging.basicConfig(format='%(asctime)s [%(levelname)s] - %(message)s', level=numeric_level)
|
|
uri = "mongodb://speechRecoUser:speech!reco@localhost/archSpeechReco"
|
|
db_name = "archSpeechReco"
|
|
col_name = "moviesMeta"
|
|
|
|
col = get_mongo_collection(col_name, db_name, uri)
|
|
|
|
logging.info("let's start")
|
|
|
|
for file in glob(f'{input_path}/*.xml'):
|
|
id = path.basename(file).split('.')[0]
|
|
root = ET.parse(file).getroot()
|
|
out = []
|
|
for word in root.iter('Word'):
|
|
out.append(word.text)
|
|
try:
|
|
now = datetime.now()
|
|
col.update_one({"_id": ObjectId(id)}, {"$set": {"pcssTextRecoArm2.transcript": " ".join(out),
|
|
"pcssTextRecoArm2.transcripted": now.strftime("%Y-%m-%d %H:%M:%S")}})
|
|
except Exception as e:
|
|
logging.error(e)
|
|
else:
|
|
logging.info(f'mongo update OK {id}')
|
|
logging.debug(f'{path.basename(id)}: {" ".join(out)}')
|
|
|
|
logging.info("program finished")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(description='PCSS reco')
|
|
parser.add_argument("--input", help="input path for XML files")
|
|
parser.add_argument("--loglevel", help="log lever: INFO WARNING ERROR")
|
|
args = parser.parse_args()
|
|
main(args)
|