from mongo.helpers import get_mongo_collection import argparse import logging from glob import glob from os import path def main(args): loglevel = args.loglevel input_path = args.input numeric_level = getattr(logging, loglevel.upper(), 10) logging.basicConfig(format='%(asctime)s [%(levelname)s] - %(message)s', level=numeric_level) uri = "mongodb://speechRecoUser:speech!reco@localhost/archSpeechReco" db_name = "archSpeechReco" col_name = "moviesMeta" col = get_mongo_collection(col_name, db_name, uri) logging.info("let's start") logging.info(input_path) for file in glob(f'{input_path}/*.description'): video_id = path.basename(file).split('.')[0] video_url = f'https://www.youtube.com/watch?v={video_id}' logging.debug(f'YT URL: {video_url}') with open(file) as f: video_descr = f.read() logging.debug(f'Desc: {video_descr}') if __name__ == '__main__': parser = argparse.ArgumentParser(description='YouTube description to mongo') parser.add_argument("--input", help="input path for .description files") parser.add_argument("--loglevel", help="log level: DEBUG INFO WARNING ERROR") args = parser.parse_args() main(args)