import sys from awsglue.transforms import * from awsglue.utils import getResolvedOptions from pyspark.context import SparkContext from awsglue.context import GlueContext from awsglue.job import Job from awsglue import DynamicFrame def sparkSqlQuery(glueContext, query, mapping, transformation_ctx) -> DynamicFrame: for alias, frame in mapping.items(): frame.toDF().createOrReplaceTempView(alias) result = spark.sql(query) return DynamicFrame.fromDF(result, glueContext, transformation_ctx) args = getResolvedOptions(sys.argv, ['JOB_NAME']) sc = SparkContext() glueContext = GlueContext(sc) spark = glueContext.spark_session job = Job(glueContext) job.init(args['JOB_NAME'], args) # Script generated for node AWS Glue Data Catalog AWSGlueDataCatalog_node1717182213254 = glueContext.create_dynamic_frame.from_catalog(database="datalake_processed_878695318857_ek_1201695", table_name="processed_stockdata", transformation_ctx="AWSGlueDataCatalog_node1717182213254") # Script generated for node SQL Query SqlQuery3688 = ''' select ROUND(SUM(amount), 2) as total_volume, ROUND(SUM(dollar_amount), 2) as total_dollars, COUNT(*) as total_cnt_of_transactions, type, symbol, year, month, day from datalake_processed_878695318857_ek_1201695.processed_stockdata group by symbol, year, month, day, type; ''' SQLQuery_node1717182234219 = sparkSqlQuery(glueContext, query = SqlQuery3688, mapping = {"myDataSource2":AWSGlueDataCatalog_node1717182213254}, transformation_ctx = "SQLQuery_node1717182234219") # Script generated for node AWS Glue Data Catalog AWSGlueDataCatalog_node1717182241035 = glueContext.write_dynamic_frame.from_catalog(frame=SQLQuery_node1717182234219, database="datalake_processed_878695318857_ek_1201695", table_name="agg_stockdata", additional_options={"enableUpdateCatalog": True, "updateBehavior": "UPDATE_IN_DATABASE", "partitionKeys": ["symbol", "year", "month", "day"]}, transformation_ctx="AWSGlueDataCatalog_node1717182241035") job.commit()