from mongo.helpers import get_mongo_collection import argparse import logging from glob import glob from os import path import re from datetime import datetime def main(args): loglevel = args.loglevel input_path = args.input input_source = args.source numeric_level = getattr(logging, loglevel.upper(), 10) logging.basicConfig(format='%(asctime)s [%(levelname)s] - %(message)s', level=numeric_level) uri = "mongodb://speechRecoUser:speech!reco@localhost/archSpeechReco" db_name = "archSpeechReco" col_name = "moviesMeta" col = get_mongo_collection(col_name, db_name, uri) logging.info("let's start") logging.info(input_path) for file in glob(f'{input_path}/*.description'): file_title = file.replace('description', 'title') video_id = path.basename(file).split('.')[0] video_url = f'https://www.youtube.com/watch?v={video_id}' logging.debug(f'YT URL: {video_url}') with open(file) as f: video_descr = f.read() logging.debug(f'Desc: {video_descr}') with open(file_title) as f: video_title = f.read() logging.debug(f'Title: {video_title}') m = re.match(r".*(\d\d\.\d\d\.19\d\d).*", video_title) if m is not None: video_date = datetime.strptime(m[1], '%d.%m.%Y').strftime('%Y-%m-%d') else: m = re.match(r".*(19\d\d).*", video_title) video_date = m[1] if m is not None else "brak daty" logging.debug(f'Video Date: {video_date}') to_mongo = { 'title': video_title, 'url': video_url, 'source': input_source, 'gcsMp4': { 'location': f'mp4/{video_id}.mp4', 'uploadDate': datetime.now().strftime("%Y-%m-%d %H:%M:%S") }, 'gcsWav': { 'location': f'wave/{video_id}.wav', 'uploadDate': datetime.now().strftime("%Y-%m-%d %H:%M:%S") } } try: col.insert_one(to_mongo) except: logging.error('mongo update failed') else: logging.info('mongo insert OK') logging.debug(f'inserted: {to_mongo}') if __name__ == '__main__': parser = argparse.ArgumentParser(description='YouTube description to mongo') parser.add_argument("--input", help="input path for .description files") parser.add_argument("--source", help="source of media files [dtv, sonda, kronikiprl]") parser.add_argument("--loglevel", help="log level: DEBUG INFO WARNING ERROR") args = parser.parse_args() main(args)