This commit is contained in:
Wojciech Smolak 2020-08-19 20:10:27 +02:00
parent bdc7d98c3f
commit c1133260b5
2 changed files with 37 additions and 1 deletions

View File

@ -43,6 +43,6 @@ def main(args):
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='PCSS reco')
parser.add_argument("--input", help="input path for XML files")
parser.add_argument("--loglevel", help="log lever: INFO WARNING ERROR")
parser.add_argument("--loglevel", help="log level: INFO WARNING ERROR")
args = parser.parse_args()
main(args)

36
src/yt_toMongo.py Normal file
View File

@ -0,0 +1,36 @@
from mongo.helpers import get_mongo_collection
import argparse
import logging
from glob import glob
from os import path
def main(args):
loglevel = args.loglevel
input_path = args.input
numeric_level = getattr(logging, loglevel.upper(), 10)
logging.basicConfig(format='%(asctime)s [%(levelname)s] - %(message)s', level=numeric_level)
uri = "mongodb://speechRecoUser:speech!reco@localhost/archSpeechReco"
db_name = "archSpeechReco"
col_name = "moviesMeta"
col = get_mongo_collection(col_name, db_name, uri)
logging.info("let's start")
logging.info(input_path)
for file in glob(f'{input_path}/*.description'):
video_id = path.basename(file).split('.')[0]
video_url = f'https://www.youtube.com/watch?v={video_id}'
logging.debug(f'YT URL: {video_url}')
with open(file) as f:
video_descr = f.read()
logging.debug(f'Desc: {video_descr}')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='YouTube description to mongo')
parser.add_argument("--input", help="input path for .description files")
parser.add_argument("--loglevel", help="log level: DEBUG INFO WARNING ERROR")
args = parser.parse_args()
main(args)