Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sandbox cloudwatch logs to S3 #6379

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ updates:
- "terraform/auth0/ministryofjustice-data-platform-development"
- "terraform/aws/analytical-platform-data-engineering-production/10ds"
- "terraform/aws/analytical-platform-data-engineering-sandbox-a/airflow-create-a-pipeline"
- "terraform/aws/analytical-platform-data-engineering-sandbox-a/cloudtrail-athena-events"
- "terraform/aws/analytical-platform-data-production/airflow"
- "terraform/aws/analytical-platform-data-production/artifact-repos"
- "terraform/aws/analytical-platform-data-production/athena"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
builds

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module "cloudtrail_athena_events_log_group" {
#checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions

source = "terraform-aws-modules/cloudwatch/aws//modules/log-group"
version = "5.6.1"

name = "cloudtrail-athena-events"
retention_in_days = 400
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
module "cloudtrail_athena_events_subscription_filter" {
#checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions

source = "terraform-aws-modules/cloudwatch/aws//modules/log-subscription-filter"
version = "5.6.1"

name = "cloudtrail-athena-events"
log_group_name = "cloudtrail"
filter_pattern = "{ ($.eventName = \"StartQueryExecution\") }"
destination_arn = module.cloudtrail_athena_event_processor_function.lambda_function_arn
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
data "aws_caller_identity" "session" {
provider = aws.session
}

data "aws_iam_session_context" "session" {
provider = aws.session

arn = data.aws_caller_identity.session.arn
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#trivy:ignore:avd-aws-0066:X-Ray is not required for this service
module "cloudtrail_athena_event_processor_function" {
#checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions

source = "terraform-aws-modules/lambda/aws"
version = "7.17.0"

function_name = "cloudtrail-athena-event-processor"
description = "Processes incoming CloudTrail events and forwards them to CloudWatch Logs"
handler = "main.lambda_handler"
runtime = "python3.12"
timeout = 120

source_path = "${path.module}/src/cloudtrail-athena-event-processor"
trigger_on_package_timestamp = false

allowed_triggers = {
"logs" = {
principal = "logs.amazonaws.com"
}
}
create_current_version_allowed_triggers = false

environment_variables = {
CLOUDWATCH_LOG_GROUP_NAME = module.cloudtrail_athena_events_log_group.cloudwatch_log_group_name
}

attach_policy_statements = true
policy_statements = {
logs_access = {
sid = "AllowCloudWatchLogs"
effect = "Allow"
actions = [
"logs:CreateLogStream",
"logs:PutLogEvents"
]
resources = ["${module.cloudtrail_athena_events_log_group.cloudwatch_log_group_arn}:*"]
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import base64
import datetime
import gzip
import json
import os
from io import BytesIO

import boto3

CLOUDWATCH_LOG_GROUP_NAME = os.environ["CLOUDWATCH_LOG_GROUP_NAME"]
CLOUDWATCH_LOG_STREAM_NAME = datetime.datetime.now().strftime("%Y-%m-%d")

logs_client = boto3.client("logs")


def lambda_handler(event, context): # pylint: disable=unused-argument
# Extract and decode the data
compressed_data = base64.b64decode(event["awslogs"]["data"])

# Decompress the data
with gzip.GzipFile(fileobj=BytesIO(compressed_data)) as gzipfile:
decompressed_data = gzipfile.read()

# Parse the JSON data
parsed_event = json.loads(decompressed_data)

# Process the log events
for log_event in parsed_event["logEvents"]:
print(f"Processing log event: {log_event['id']}")

# Get the timestamp from the log event
timestamp = log_event["timestamp"]

# Get the message from the log event
message = json.loads(log_event["message"])

# Create a log stream
try:
create_log_stream = logs_client.create_log_stream( # noqa: F841
logGroupName=CLOUDWATCH_LOG_GROUP_NAME,
logStreamName=CLOUDWATCH_LOG_STREAM_NAME,
)
except logs_client.exceptions.ResourceAlreadyExistsException:
pass

# Put the log event in the log stream
try:
put_log_event = logs_client.put_log_events( # noqa: F841
logGroupName=CLOUDWATCH_LOG_GROUP_NAME,
logStreamName=CLOUDWATCH_LOG_STREAM_NAME,
logEvents=[{"timestamp": timestamp, "message": json.dumps(message)}],
)
except Exception as e:
print(f"Failed to put log events: {log_event['id']}")
print(e)
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
terraform {
backend "s3" {
acl = "private"
bucket = "global-tf-state-aqsvzyd5u9"
encrypt = true
key = "aws/analytical-platform-data-engineering-sandbox-a/cloudtrail-athena-events/terraform.tfstate"
region = "eu-west-2"
dynamodb_table = "global-tf-state-aqsvzyd5u9-locks"
}
required_providers {
aws = {
source = "hashicorp/aws"
version = "5.81.0"
}
}
required_version = "~> 1.5"
}

provider "aws" {
alias = "session"
}

provider "aws" {
region = "eu-west-2"
assume_role {
role_arn = "arn:aws:iam::${var.account_ids["analytical-platform-data-engineering-sandbox-a"]}:role/GlobalGitHubActionAdmin"
}
default_tags {
tags = var.tags
}
}

provider "aws" {
alias = "analytical-platform-management-production"
region = "eu-west-2"
assume_role {
role_arn = can(regex("AdministratorAccess", data.aws_iam_session_context.session.issuer_arn)) ? null : "arn:aws:iam::${var.account_ids["analytical-platform-management-production"]}:role/GlobalGitHubActionAdmin"
}
default_tags {
tags = var.tags
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
account_ids = {
analytical-platform-data-engineering-sandbox-a = "684969100054"
analytical-platform-management-production = "042130406152"
}

tags = {
business-unit = "Platforms"
application = "Analytical Platform"
component = "CloudTrail Athena Events"
environment = "sandbox-a"
is-production = "false"
owner = "analytical-platform:analytical-platform@digital.justice.gov.uk"
infrastructure-support = "analytical-platform:analytical-platform@digital.justice.gov.uk"
source-code = "github.com/ministryofjustice/analytical-platform/terraform/aws/analytical-platform-data-engineering-sandbox-a/cloudtrail-athena-events"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
variable "account_ids" {
type = map(string)
description = "Map of account names to account IDs"
}

variable "tags" {
type = map(string)
description = "Map of tags to apply to resources"
}
Loading