0
0
Fork 0
cwiczenia/configuration.py

36 lines
1016 B
Python

from pyspark.sql import SparkSession
import logging
import datetime
import time
spark = SparkSession \
.builder \
.appName("configuration_test") \
.enableHiveSupport() \
.getOrCreate()
log4jLogger = spark._jvm.org.apache.log4j
logger = log4jLogger.LogManager.getLogger(__name__)
try:
logger.info("SPARKAPP START")
start = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
cat = spark.sql("select * from hrucinska.uam_categories")
offers = spark.sql("select * from hrucinska.uam_offers")
res = offers.join(cat, cat.category_id == offers.category_leaf)
print res.where(res.category_level2 == "RTV i AGD").count()
stop = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
print "czas rozpoczecia obliczen 1: ", start
print "czas zakonczenia obliczen1: ", stop
time.sleep(180)
except Exception as inst:
logger.info("SPARKAPP ERROR {0}".format(inst))
finally:
logger.info("SPARKAPP STOP")