From c1133260b50bb62f0c1858d85648e066b174ad76 Mon Sep 17 00:00:00 2001 From: Wojciech Smolak Date: Wed, 19 Aug 2020 20:10:27 +0200 Subject: [PATCH] youtube --- src/pcss_reco.py | 2 +- src/yt_toMongo.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 src/yt_toMongo.py diff --git a/src/pcss_reco.py b/src/pcss_reco.py index 6dde15f4..3fb9f33f 100644 --- a/src/pcss_reco.py +++ b/src/pcss_reco.py @@ -43,6 +43,6 @@ def main(args): if __name__ == '__main__': parser = argparse.ArgumentParser(description='PCSS reco') parser.add_argument("--input", help="input path for XML files") - parser.add_argument("--loglevel", help="log lever: INFO WARNING ERROR") + parser.add_argument("--loglevel", help="log level: INFO WARNING ERROR") args = parser.parse_args() main(args) diff --git a/src/yt_toMongo.py b/src/yt_toMongo.py new file mode 100644 index 00000000..f7af7f23 --- /dev/null +++ b/src/yt_toMongo.py @@ -0,0 +1,36 @@ +from mongo.helpers import get_mongo_collection +import argparse +import logging +from glob import glob +from os import path + + +def main(args): + loglevel = args.loglevel + input_path = args.input + numeric_level = getattr(logging, loglevel.upper(), 10) + logging.basicConfig(format='%(asctime)s [%(levelname)s] - %(message)s', level=numeric_level) + uri = "mongodb://speechRecoUser:speech!reco@localhost/archSpeechReco" + db_name = "archSpeechReco" + col_name = "moviesMeta" + + col = get_mongo_collection(col_name, db_name, uri) + + logging.info("let's start") + logging.info(input_path) + + for file in glob(f'{input_path}/*.description'): + video_id = path.basename(file).split('.')[0] + video_url = f'https://www.youtube.com/watch?v={video_id}' + logging.debug(f'YT URL: {video_url}') + with open(file) as f: + video_descr = f.read() + logging.debug(f'Desc: {video_descr}') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='YouTube description to mongo') + parser.add_argument("--input", help="input path for .description files") + parser.add_argument("--loglevel", help="log level: DEBUG INFO WARNING ERROR") + args = parser.parse_args() + main(args)