Added new terraform components

This commit is contained in:
EC2 Default User 2024-05-12 09:12:29 +00:00
parent 28d1f19042
commit 9c95137819
5 changed files with 101 additions and 25 deletions

View File

@ -1,22 +1,25 @@
import urllib.parse
import awswrangler as wr
import pandas as pd
import boto3
client_ssm = boto3.client('ssm')
def etl_function(event, context):
processed_zone_prefix = "processed-zone"
record = event["Records"][0]
bucket = record["s3"]["bucket"]["name"]
src_bucket = record["s3"]["bucket"]["name"]
target_bucket = client_ssm.get_parameter(Name='s3_processed_bucket_name')['Parameter']['Value']
key = urllib.parse.unquote(record["s3"]["object"]["key"])
event_prefix = key.split('/')[1]
full_src_path = 's3://{bucket}/{key}'.format(bucket=bucket, key=key)
full_src_path = 's3://{src_bucket}/{key}'.format(src_bucket=src_bucket, key=key)
print(f'Processing key = {full_src_path}')
df = wr.s3.read_json(path=full_src_path, lines=True)
filename = key.split('/')[-1][-36:]
dest_prefix = f"s3://{bucket}/{processed_zone_prefix}/{event_prefix}"
dest_prefix = f"s3://{target_bucket}/{processed_zone_prefix}/{event_prefix}"
df['transaction_date'] = pd.to_datetime(df['transaction_ts'], unit='s')
df['year'] = df['transaction_date'].dt.year

View File

@ -0,0 +1,5 @@
resource "aws_ssm_parameter" "s3_processed" {
name = "s3_processed_bucket_name"
type = "String"
value = aws_s3_bucket.processed_bucket.bucket
}

View File

@ -1,7 +1,7 @@
{
"version": 4,
"terraform_version": "1.8.1",
"serial": 145,
"serial": 212,
"lineage": "a77aaaba-b4f8-6adb-0387-8f0b98d722c2",
"outputs": {},
"resources": [],

View File

@ -1,7 +1,7 @@
{
"version": 4,
"terraform_version": "1.8.1",
"serial": 130,
"serial": 196,
"lineage": "a77aaaba-b4f8-6adb-0387-8f0b98d722c2",
"outputs": {},
"resources": [
@ -43,7 +43,7 @@
"id": "development",
"name": "development",
"state": "ENABLED",
"tags": null,
"tags": {},
"tags_all": {}
},
"sensitive_attributes": [],
@ -82,8 +82,8 @@
"id": "534534002841:datalake_processed_534534002841_ab_1201680",
"location_uri": "",
"name": "datalake_processed_534534002841_ab_1201680",
"parameters": null,
"tags": null,
"parameters": {},
"tags": {},
"tags_all": {},
"target_database": []
},
@ -120,8 +120,8 @@
"id": "534534002841:datalake_raw_534534002841_ab_1201680",
"location_uri": "",
"name": "datalake_raw_534534002841_ab_1201680",
"parameters": null,
"tags": null,
"parameters": {},
"tags": {},
"tags_all": {},
"target_database": []
},
@ -141,7 +141,7 @@
"attributes": {
"arn": "arn:aws:glue:us-east-1:534534002841:crawler/gc-raw-534534002841-ab-1201680",
"catalog_target": [],
"classifiers": null,
"classifiers": [],
"configuration": "",
"database_name": "datalake_raw_534534002841_ab_1201680",
"delta_target": [],
@ -175,7 +175,7 @@
"connection_name": "",
"dlq_event_queue_arn": "",
"event_queue_arn": "",
"exclusions": null,
"exclusions": [],
"path": "s3://datalake-raw-534534002841-ab-1201680/raw-zone/stockdata/",
"sample_size": 0
}
@ -275,7 +275,7 @@
],
"snowflake_configuration": [],
"splunk_configuration": [],
"tags": null,
"tags": {},
"tags_all": {},
"timeouts": null,
"version_id": "1"
@ -365,9 +365,9 @@
"image_uri": "",
"invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:534534002841:function:etl-post-processing-534534002841-ab-1201680/invocations",
"kms_key_arn": "",
"last_modified": "2024-05-11T10:53:43.594+0000",
"last_modified": "2024-05-12T07:30:33.751+0000",
"layers": [
"arn:aws:lambda:us-east-1:534534002841:layer:aws_wrangler_534534002841_ab_1201680:2"
"arn:aws:lambda:us-east-1:534534002841:layer:aws_wrangler_534534002841_ab_1201680:4"
],
"logging_config": [
{
@ -396,7 +396,7 @@
"snap_start": [],
"source_code_hash": "DYklWA51/+hutwYtHutJg59rV7DY0LEgfp+ne8wgiSo=",
"source_code_size": 884,
"tags": null,
"tags": {},
"tags_all": {},
"timeout": 300,
"timeouts": null,
@ -425,15 +425,15 @@
{
"schema_version": 0,
"attributes": {
"arn": "arn:aws:lambda:us-east-1:534534002841:layer:aws_wrangler_534534002841_ab_1201680:2",
"compatible_architectures": null,
"arn": "arn:aws:lambda:us-east-1:534534002841:layer:aws_wrangler_534534002841_ab_1201680:4",
"compatible_architectures": [],
"compatible_runtimes": [
"python3.8"
],
"created_date": "2024-05-11T10:53:43.233+0000",
"created_date": "2024-05-12T07:30:26.214+0000",
"description": "",
"filename": "../lambda/awswrangler-layer-2.7.0-py3.8.zip",
"id": "arn:aws:lambda:us-east-1:534534002841:layer:aws_wrangler_534534002841_ab_1201680:2",
"id": "arn:aws:lambda:us-east-1:534534002841:layer:aws_wrangler_534534002841_ab_1201680:4",
"layer_arn": "arn:aws:lambda:us-east-1:534534002841:layer:aws_wrangler_534534002841_ab_1201680",
"layer_name": "aws_wrangler_534534002841_ab_1201680",
"license_info": "",
@ -445,7 +445,7 @@
"skip_destroy": false,
"source_code_hash": "C0YX/4auMnBs4J9JCDy1f7uc2GLF0vU7ppQgzffQiN4=",
"source_code_size": 43879070,
"version": "2"
"version": "4"
},
"sensitive_attributes": [],
"private": "bnVsbA=="
@ -514,7 +514,25 @@
],
"hosted_zone_id": "Z3AQBSTGFYJSTF",
"id": "athena-results-534534002841-ab-1201680",
"lifecycle_rule": [],
"lifecycle_rule": [
{
"abort_incomplete_multipart_upload_days": 0,
"enabled": true,
"expiration": [
{
"date": "",
"days": 1,
"expired_object_delete_marker": false
}
],
"id": "standard-expiration",
"noncurrent_version_expiration": [],
"noncurrent_version_transition": [],
"prefix": "",
"tags": {},
"transition": []
}
],
"logging": [],
"object_lock_configuration": [],
"object_lock_enabled": false,
@ -778,7 +796,17 @@
"bucket": "datalake-raw-534534002841-ab-1201680",
"eventbridge": false,
"id": "datalake-raw-534534002841-ab-1201680",
"lambda_function": [],
"lambda_function": [
{
"events": [
"s3:ObjectCreated:*"
],
"filter_prefix": "raw-zone/",
"filter_suffix": "",
"id": "tf-s3-lambda-20240512073033855700000001",
"lambda_function_arn": "arn:aws:lambda:us-east-1:534534002841:function:etl-post-processing-534534002841-ab-1201680"
}
],
"queue": [],
"topic": []
},
@ -792,6 +820,46 @@
]
}
]
},
{
"mode": "managed",
"type": "aws_ssm_parameter",
"name": "s3_processed",
"provider": "provider[\"registry.terraform.io/hashicorp/aws\"]",
"instances": [
{
"schema_version": 0,
"attributes": {
"allowed_pattern": "",
"arn": "arn:aws:ssm:us-east-1:534534002841:parameter/s3_processed_bucket_name",
"data_type": "text",
"description": "",
"id": "s3_processed_bucket_name",
"insecure_value": null,
"key_id": "",
"name": "s3_processed_bucket_name",
"overwrite": null,
"tags": {},
"tags_all": {},
"tier": "Standard",
"type": "String",
"value": "datalake-processed-534534002841-ab-1201680",
"version": 3
},
"sensitive_attributes": [
[
{
"type": "get_attr",
"value": "value"
}
]
],
"private": "bnVsbA==",
"dependencies": [
"aws_s3_bucket.processed_bucket"
]
}
]
}
],
"check_results": null