commit 8949ebc07f491267f71164bd1008bab72641f91b Author: Wojciech Borowski-Dobrowolski Date: Sun May 12 07:31:59 2024 +0000 Intitial state diff --git a/athena.tf b/athena.tf new file mode 100644 index 0000000..4abd327 --- /dev/null +++ b/athena.tf @@ -0,0 +1,27 @@ +resource "aws_s3_bucket" "athena_results" { + bucket = "athena-results-${var.account_number}-${var.student_initials}-${var.student_index_no}" + force_destroy = true + tags = merge(local.common_tags) + } + +resource "aws_s3_bucket_lifecycle_configuration" "athena_results_lifecycle" { + bucket = aws_s3_bucket.athena_results.id + rule { + id = "standard-expiration" + status = "Enabled" + expiration { + days=1 + } + } + } + +resource "aws_athena_workgroup" "athena_workgroup" { + name = "development" + configuration { + enforce_workgroup_configuration = true + result_configuration { + output_location = "s3://${aws_s3_bucket.athena_results.bucket}/output/" + } + } + force_destroy = true +} \ No newline at end of file diff --git a/glue_catalog_database.tf b/glue_catalog_database.tf new file mode 100644 index 0000000..2e5ba21 --- /dev/null +++ b/glue_catalog_database.tf @@ -0,0 +1,17 @@ +resource "aws_glue_catalog_database" "datalake_db_raw_zone" { +name = "datalake_raw_${var.account_number}_${var.student_initials}_${var.student_index_no}" +} + +resource "aws_glue_catalog_database" "datalake_db_processed_zone" { + name = "datalake_processed_${var.account_number}_${var.student_initials}_${var.student_index_no}" +} +resource "aws_glue_crawler" "glue_crawler_raw_zone" { + database_name = aws_glue_catalog_database.datalake_db_raw_zone.name + name = "gc-raw-${var.account_number}-${var.student_initials}-${var.student_index_no}" + role = var.lab_role_arn + table_prefix = "crawler_" + s3_target { + path = "s3://${aws_s3_bucket.raw_bucket.bucket}/raw-zone/stockdata/" + } + tags = merge(local.common_tags, ) +} \ No newline at end of file diff --git a/kinesis_ds.tf b/kinesis_ds.tf new file mode 100644 index 0000000..e5fbc6f --- /dev/null +++ b/kinesis_ds.tf @@ -0,0 +1,16 @@ +resource "aws_kinesis_stream" "cryptostock_stream" { + name = "cryptostock-${var.account_number}-${var.student_initials}-${var.student_index_no}" + shard_count = 1 + enforce_consumer_deletion = true + shard_level_metrics = [ + "IncomingBytes", + "OutgoingBytes", + "IncomingRecords", + "OutgoingRecords" + ] + tags = { + Purpose = "UAM Cloud Data Processing" + Environment = "DEV" + Owner = var.student_full_name + } +} \ No newline at end of file diff --git a/kinesis_fh.tf b/kinesis_fh.tf new file mode 100644 index 0000000..296ada2 --- /dev/null +++ b/kinesis_fh.tf @@ -0,0 +1,16 @@ +resource "aws_kinesis_firehose_delivery_stream" "stock_delivery_stream" { + name = "firehose-${var.account_number}-${var.student_initials}-${var.student_index_no}" + destination = "extended_s3" + kinesis_source_configuration { + kinesis_stream_arn = aws_kinesis_stream.cryptostock_stream.arn + role_arn = var.lab_role_arn + } + extended_s3_configuration { + role_arn = var.lab_role_arn + bucket_arn = aws_s3_bucket.raw_bucket.arn + buffering_size = 1 + buffering_interval = 60 + prefix = "raw-zone/stockdata/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/" + error_output_prefix = "${"raw-zone/stockdata_errors/!{firehose:error-output-type}/year=!{timestamp:yyyy}"}${"/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}"}/" + } +} \ No newline at end of file diff --git a/lambda.tf b/lambda.tf new file mode 100644 index 0000000..3d0ce37 --- /dev/null +++ b/lambda.tf @@ -0,0 +1,36 @@ +resource "aws_lambda_layer_version" "aws_wrangler" { + filename = "../lambda/awswrangler-layer-2.7.0-py3.8.zip" + layer_name = "aws_wrangler_${var.account_number}_${var.student_initials}_${var.student_index_no}" + source_code_hash = "${filebase64sha256("../lambda/awswrangler-layer-2.7.0-py3.8.zip")}" + compatible_runtimes = ["python3.8"] +} + +resource "aws_lambda_function" "etl_post_processing" { + function_name = "etl-post-processing-${var.account_number}-${var.student_initials}-${var.student_index_no}" + filename = "lambda_definition.1.zip" + handler = "lambda_definition.etl_function" + runtime = "python3.8" + role = var.lab_role_arn + timeout = 300 + memory_size = 512 + source_code_hash= filebase64sha256("lambda_definition.1.zip") + layers = ["${aws_lambda_layer_version.aws_wrangler.arn}"] +} + +resource "aws_lambda_permission" "allow_bucket" { + statement_id = "AllowExecutionFromS3Bucket" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.etl_post_processing.arn + principal = "s3.amazonaws.com" + source_arn = aws_s3_bucket.raw_bucket.arn +} + +resource "aws_s3_bucket_notification" "trigger_etl_lambda" { + bucket = aws_s3_bucket.raw_bucket.id + lambda_function { + lambda_function_arn = aws_lambda_function.etl_post_processing.arn + events = ["s3:ObjectCreated:*"] + filter_prefix = "raw-zone/" + } + depends_on = [aws_lambda_permission.allow_bucket] +} \ No newline at end of file diff --git a/main.tf b/main.tf new file mode 100644 index 0000000..cdce654 --- /dev/null +++ b/main.tf @@ -0,0 +1,7 @@ +locals { + common_tags = { + purpose = "UAM Cloud Data Processing" + environment = "DEV" + owner = var.student_full_name + } +} \ No newline at end of file diff --git a/provider.tf b/provider.tf new file mode 100644 index 0000000..ec26d52 --- /dev/null +++ b/provider.tf @@ -0,0 +1,13 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +provider "aws" { + profile = "default" + region = var.region +} \ No newline at end of file diff --git a/s3.tf b/s3.tf new file mode 100644 index 0000000..8b443a8 --- /dev/null +++ b/s3.tf @@ -0,0 +1,16 @@ +resource "aws_s3_bucket" "raw_bucket" { + bucket = "datalake-raw-${var.account_number}-${var.student_initials}-${var.student_index_no}" + force_destroy = true + tags = { + Purpose = "UAM Cloud Data Processing" + Environment = "DEV" + } +} +resource "aws_s3_bucket" "processed_bucket" { + bucket = "datalake-processed-${var.account_number}-${var.student_initials}-${var.student_index_no}" + force_destroy = true + tags = { + Purpose = "UAM Cloud Data Processing" + Environment = "DEV" + } +} diff --git a/variables.tf b/variables.tf new file mode 100644 index 0000000..9f65695 --- /dev/null +++ b/variables.tf @@ -0,0 +1,37 @@ +variable "account_number" { + description = "Account number" + type = number +} + +variable "region" { + description = "Region name - must be NVirginia us-east-1" + type = string + default = "us-east-1" +} + +variable "environment" { + description = "Environment name" + type = string + default = "dev" +} + +variable "student_initials" { + description = "letters of first and last names" + type = string +} + +variable "student_full_name" { + description = "Student's full name" + type = string +} + +variable "student_index_no" { + description = "Index no" + type = string +} + +variable "lab_role_arn" { + description = "the role we use for all labs, dont use a single role for everything! it is an anti-pattern!!!!" + type = string + +}