38 lines
1.9 KiB
Python
38 lines
1.9 KiB
Python
|
import sys
|
||
|
from awsglue.transforms import *
|
||
|
from awsglue.utils import getResolvedOptions
|
||
|
from pyspark.context import SparkContext
|
||
|
from awsglue.context import GlueContext
|
||
|
from awsglue.job import Job
|
||
|
from awsglue import DynamicFrame
|
||
|
|
||
|
def sparkSqlQuery(glueContext, query, mapping, transformation_ctx) -> DynamicFrame:
|
||
|
for alias, frame in mapping.items():
|
||
|
frame.toDF().createOrReplaceTempView(alias)
|
||
|
result = spark.sql(query)
|
||
|
return DynamicFrame.fromDF(result, glueContext, transformation_ctx)
|
||
|
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
|
||
|
sc = SparkContext()
|
||
|
glueContext = GlueContext(sc)
|
||
|
spark = glueContext.spark_session
|
||
|
job = Job(glueContext)
|
||
|
job.init(args['JOB_NAME'], args)
|
||
|
|
||
|
# Script generated for node AWS Glue Data Catalog
|
||
|
AWSGlueDataCatalog_node1717182213254 = glueContext.create_dynamic_frame.from_catalog(database="datalake_processed_878695318857_ek_1201695", table_name="processed_stockdata", transformation_ctx="AWSGlueDataCatalog_node1717182213254")
|
||
|
|
||
|
# Script generated for node SQL Query
|
||
|
SqlQuery3688 = '''
|
||
|
select ROUND(SUM(amount), 2) as total_volume,
|
||
|
ROUND(SUM(dollar_amount), 2) as total_dollars,
|
||
|
COUNT(*) as total_cnt_of_transactions,
|
||
|
type, symbol, year, month, day
|
||
|
from datalake_processed_878695318857_ek_1201695.processed_stockdata
|
||
|
group by symbol, year, month, day, type;
|
||
|
'''
|
||
|
SQLQuery_node1717182234219 = sparkSqlQuery(glueContext, query = SqlQuery3688, mapping = {"myDataSource2":AWSGlueDataCatalog_node1717182213254}, transformation_ctx = "SQLQuery_node1717182234219")
|
||
|
|
||
|
# Script generated for node AWS Glue Data Catalog
|
||
|
AWSGlueDataCatalog_node1717182241035 = glueContext.write_dynamic_frame.from_catalog(frame=SQLQuery_node1717182234219, database="datalake_processed_878695318857_ek_1201695", table_name="agg_stockdata", additional_options={"enableUpdateCatalog": True, "updateBehavior": "UPDATE_IN_DATABASE", "partitionKeys": ["symbol", "year", "month", "day"]}, transformation_ctx="AWSGlueDataCatalog_node1717182241035")
|
||
|
|
||
|
job.commit()
|