46 lines
1.4 KiB
Python
46 lines
1.4 KiB
Python
|
from pyspark.sql import SparkSession
|
||
|
from pyspark.sql.functions import broadcast
|
||
|
import logging
|
||
|
import datetime
|
||
|
import time
|
||
|
|
||
|
spark = SparkSession \
|
||
|
.builder \
|
||
|
.appName("broadcast") \
|
||
|
.enableHiveSupport() \
|
||
|
.getOrCreate()
|
||
|
|
||
|
log4jLogger = spark._jvm.org.apache.log4j
|
||
|
logger = log4jLogger.LogManager.getLogger(__name__)
|
||
|
|
||
|
try:
|
||
|
logger.info("SPARKAPP START")
|
||
|
|
||
|
start1 = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
|
||
|
|
||
|
cat = spark.sql("select * from hrucinska.uam_categories")
|
||
|
offers = spark.sql("select * from hrucinska.uam_offers")
|
||
|
res = offers.join(cat, cat.category_id == offers.category_leaf)
|
||
|
print res.where(res.category_level2 == "RTV i AGD").count()
|
||
|
|
||
|
stop1 = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
|
||
|
start2 = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
|
||
|
|
||
|
res = offers.join(broadcast(cat), cat.category_id == offers.category_leaf)
|
||
|
print res.where(res.category_level2 == "RTV i AGD").count()
|
||
|
|
||
|
stop2 = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
|
||
|
|
||
|
print "czas rozpoczecia obliczen 1: ", start1
|
||
|
print "czas zakonczenia obliczen1: ", stop1
|
||
|
|
||
|
print "czas rozpoczecia obliczen 2: ", start2
|
||
|
print "czas zakonczenia obliczen 2: ", stop2
|
||
|
|
||
|
time.sleep(180)
|
||
|
|
||
|
except Exception as inst:
|
||
|
logger.info("SPARKAPP ERROR {0}".format(inst))
|
||
|
finally:
|
||
|
logger.info("SPARKAPP STOP")
|