from pyspark.sql import SparkSession from pyspark.sql.functions import broadcast import logging import datetime import time spark = SparkSession \ .builder \ .appName("broadcast") \ .enableHiveSupport() \ .getOrCreate() log4jLogger = spark._jvm.org.apache.log4j logger = log4jLogger.LogManager.getLogger(__name__) try: logger.info("SPARKAPP START") start1 = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S') cat = spark.sql("select * from hrucinska.uam_categories") offers = spark.sql("select * from hrucinska.uam_offers") res = offers.join(cat, cat.category_id == offers.category_leaf) print res.where(res.category_level2 == "RTV i AGD").count() stop1 = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S') start2 = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S') res = offers.join(broadcast(cat), cat.category_id == offers.category_leaf) print res.where(res.category_level2 == "RTV i AGD").count() stop2 = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S') print "czas rozpoczecia obliczen 1: ", start1 print "czas zakonczenia obliczen1: ", stop1 print "czas rozpoczecia obliczen 2: ", start2 print "czas zakonczenia obliczen 2: ", stop2 time.sleep(180) except Exception as inst: logger.info("SPARKAPP ERROR {0}".format(inst)) finally: logger.info("SPARKAPP STOP")