Skip to content

Commit 0e1ee8f

Browse files
authored
Draft for put landing page; identified TODOs (#34)
* Draft for put landing page; identified TODOs Issue: #25 * Completed tf surgery; Identify all TODOs in golang (#35) * Complete tf surgery; Identify all TODOs in golang For #25 * fix compile error; progress in metadata cronjob add query * Ready to test (#36) * Ready to test * Fix db field first char not lowercase Tracked by #25 (comment) * Fix permission of db index, S3 pull Tracked by #25 (comment) * All tests complete Tracked by #25 (comment)
1 parent eb61eef commit 0e1ee8f

File tree

23 files changed

+646
-329
lines changed

23 files changed

+646
-329
lines changed

.gitignore

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
**credential**
22
**/builds/**
33

4-
lambda_golang/landing
5-
lambda_golang/stories
6-
lambda_golang/landing_metadata
7-
lambda_golang/story
4+
lambda_golang/*
5+
!lambda_golang/go.mod
6+
!lambda_golang/go.sum
7+
!lambda_golang/*/
8+
!lambda_golang/*/**
89
venv
910

1011
# Binaries for programs and plugins

cloud_environments/terraform.sh

+3-2
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@ set +o allexport
2222
if (
2323
cd $GOLANG_SRC_DIR && \
2424
go build ./cmd/landing && \
25-
go build ./cmd/landing_metadata && \
25+
go build ./cmd/landing_metadata_cronjob && \
2626
go build ./cmd/stories && \
2727
go build ./cmd/story && \
28+
go build ./cmd/stories_finalizer && \
2829
cd $PYTHON_SRC_DIR && python -m compileall layer src
2930
); then
3031
cd $DEPLOY_DIR
@@ -37,7 +38,7 @@ if (
3738
# https://github.com/terraform-aws-modules/terraform-aws-step-functions/issues/20
3839
# terraform "$@" \
3940
# -target=module.main.module.scraper_lambda \
40-
# -target=module.main.module.landing_parse_metadata_lambda
41+
# -target=module.main.module.landing_metadata_cronjob_lambda
4142

4243
terraform "$@"
4344
else

cloud_module/dynamodb/table.tf

+16-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
resource "aws_ssm_parameter" "media_table" {
22
name = "/app/media-literacy/table"
33
type = "String"
4-
value = aws_dynamodb_table.media_table.arn
4+
value = "${aws_dynamodb_table.media_table.arn},${aws_dynamodb_table.media_table.id}"
55
}
66

77
// https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/dynamodb_table#attributes-reference
88
resource "aws_dynamodb_table" "media_table" {
9-
name = "Mediatable"
9+
name = "${title(replace("${var.project_alias}_${var.environment_name}", "-", "_"))}"
1010
billing_mode = "PROVISIONED"
1111
read_capacity = 20
1212
write_capacity = 20
@@ -23,8 +23,12 @@ resource "aws_dynamodb_table" "media_table" {
2323
type = "S"
2424
}
2525

26+
attribute {
27+
name = "s3Key"
28+
type = "S"
29+
}
30+
2631
// other fields
27-
// S3 key
2832
// docType = {landing | story | landingMetadata | ...}
2933
// events
3034

@@ -58,6 +62,15 @@ resource "aws_dynamodb_table" "media_table" {
5862
non_key_attributes = ["s3Key"]
5963
}
6064

65+
global_secondary_index {
66+
name = "s3KeyIndex"
67+
hash_key = "s3Key"
68+
range_key = "createdAt"
69+
write_capacity = 10
70+
read_capacity = 10
71+
projection_type = "KEYS_ONLY"
72+
}
73+
6174
tags = {
6275
Project = local.project_name
6376
Environment = var.environment_name

cloud_module/pipeline/global_ssm.tf

+4
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,8 @@ data aws_ssm_parameter media_table {
99
locals {
1010
newssite_economy_tokens = split(",", data.aws_ssm_parameter.newssite_economy.value)
1111
newssite_economy_alias = local.newssite_economy_tokens[2]
12+
13+
_media_table_tokens = split(",", data.aws_ssm_parameter.media_table.value)
14+
media_table_arn = local._media_table_tokens[0]
15+
media_table_id = local._media_table_tokens[1]
1216
}

cloud_module/pipeline/lambda.tf

+9-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ module "step_function" {
5959
module "scraper_lambda" {
6060
source = "terraform-aws-modules/lambda/aws"
6161
create_function = true
62-
function_name = "${local.project_name}-scraper-lambda"
62+
function_name = "${local.project_name}-landing-lambda"
6363
description = "Lambda function for scraping"
6464
handler = "landing"
6565
runtime = "go1.x"
@@ -82,6 +82,13 @@ module "scraper_lambda" {
8282

8383
attach_policy_statements = true
8484
policy_statements = {
85+
allow_db_query = {
86+
effect = "Allow",
87+
actions = [
88+
"dynamodb:PutItem"
89+
],
90+
resources = [local.media_table_arn]
91+
}
8592
s3_archive_bucket = {
8693
effect = "Allow",
8794
actions = [
@@ -98,6 +105,7 @@ module "scraper_lambda" {
98105
S3_ARCHIVE_BUCKET = data.aws_s3_bucket.archive.id
99106

100107
NEWSSITE_ECONOMY = data.aws_ssm_parameter.newssite_economy.value
108+
DYNAMODB_TABLE_ID = local.media_table_id
101109
}
102110

103111
tags = {

cloud_module/pipeline/landing_s3_trigger.tf

-38
This file was deleted.

cloud_module/pipeline/stories_sqs.tf cloud_module/pipeline/s3_triggers.tf

+30-42
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,31 @@
1-
module "stories_queue" {
2-
source = "terraform-aws-modules/sqs/aws"
3-
version = ">= 2.0, < 3.0"
4-
5-
# SQS queue attributes: https://docs.aws.amazon.com/AWSSimpleQueueService/latest/APIReference/API_CreateQueue.html
6-
7-
# FIFO queue should append suffix .fifo
8-
name = "${local.project_name}-stories-queue"
9-
10-
delay_seconds = 0
11-
12-
# so we can use per-message delay
13-
fifo_queue = false
14-
15-
# FIFO queue only
16-
# content_based_deduplication = true
17-
18-
visibility_timeout_seconds = 3600
1+
resource "aws_s3_bucket_notification" "bucket_notification" {
2+
bucket = data.aws_s3_bucket.archive.id
3+
4+
lambda_function {
5+
lambda_function_arn = module.landing_metadata_s3_trigger_lambda.lambda_function_arn
6+
events = ["s3:ObjectCreated:*"]
7+
filter_prefix = "${local.newssite_economy_alias}/"
8+
filter_suffix = "/metadata.json"
9+
}
1910

20-
# enable long polling
21-
receive_wait_time_seconds = 10
11+
depends_on = [
12+
aws_lambda_permission.allow_bucket_trigger_by_landing_metadata
13+
]
14+
}
2215

23-
tags = {
24-
Project = local.project_name
25-
}
16+
resource "aws_lambda_permission" "allow_bucket_trigger_by_landing_metadata" {
17+
statement_id = "AllowExecutionFromS3Bucket"
18+
action = "lambda:InvokeFunction"
19+
function_name = module.landing_metadata_s3_trigger_lambda.lambda_function_arn
20+
principal = "s3.amazonaws.com"
21+
source_arn = data.aws_s3_bucket.archive.arn
2622
}
2723

28-
module "stories_queue_consumer_lambda" {
24+
module "landing_metadata_s3_trigger_lambda" {
2925
source = "terraform-aws-modules/lambda/aws"
3026

3127
create_function = true
32-
function_name = "${local.project_name}-fetch-stories"
28+
function_name = "${local.project_name}-stories-lambda"
3329
description = "Fetch ${local.project_name} stories; triggered by metadata.json creation"
3430
handler = "stories"
3531
runtime = "go1.x"
@@ -62,30 +58,20 @@ module "stories_queue_consumer_lambda" {
6258
}
6359
EOF
6460

65-
# event source mapping for long polling
66-
event_source_mapping = {
67-
sqs = {
68-
event_source_arn = module.stories_queue.this_sqs_queue_arn
69-
batch_size = 1
70-
}
71-
}
72-
allowed_triggers = {
73-
sqs = {
74-
principal = "sqs.amazonaws.com"
75-
source_arn = module.stories_queue.this_sqs_queue_arn
76-
}
77-
}
7861
attach_policy_statements = true
7962
policy_statements = {
80-
pull_sqs = {
63+
allow_db_put = {
8164
effect = "Allow",
82-
actions = ["sqs:ReceiveMessage", "sqs:DeleteMessage", "sqs:GetQueueAttributes"],
83-
resources = [module.stories_queue.this_sqs_queue_arn]
65+
actions = [
66+
"dynamodb:UpdateItem",
67+
],
68+
resources = [
69+
local.media_table_arn,
70+
]
8471
}
8572
s3_archive_bucket = {
8673
effect = "Allow",
8774
actions = [
88-
"s3:PutObject",
8975
"s3:GetObject"
9076
],
9177
resources = [
@@ -107,8 +93,10 @@ EOF
10793
SLACK_WEBHOOK_URL = var.slack_post_webhook_url
10894
LOGLEVEL = "DEBUG"
10995
ENV = local.environment
96+
DEBUG = "true"
11097

11198
S3_ARCHIVE_BUCKET = data.aws_s3_bucket.archive.id
99+
DYNAMODB_TABLE_ID = local.media_table_id
112100
SFN_ARN = module.batch_stories_sfn.state_machine_arn
113101
}
114102

cloud_module/pipeline/scheduler.tf

+87
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,90 @@ data "aws_iam_policy_document" "scheduler" {
6060
}
6161
}
6262
}
63+
64+
65+
resource "aws_cloudwatch_event_rule" "landing_metadata_scheduler" {
66+
count = var.environment_name == "" ? 1 : 0
67+
68+
name = "${local.project_name}-schedule-start-metadata-for-landing"
69+
# schedule experssion
70+
# https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html
71+
schedule_expression = "rate(1 hours)"
72+
description = "Every hour to give courtesy to the website"
73+
}
74+
75+
resource "aws_cloudwatch_event_target" "landing_metadata_scheduler_event_target" {
76+
count = var.environment_name == "" ? 1 : 0
77+
78+
target_id = "${local.project_name}-schedule-start-metadata-for-landing-event-target"
79+
rule = aws_cloudwatch_event_rule.landing_metadata_scheduler.0.name
80+
arn = module.landing_metadata_cronjob_lambda.lambda_function_arn
81+
}
82+
83+
module landing_metadata_cronjob_lambda {
84+
source = "terraform-aws-modules/lambda/aws"
85+
create_function = true
86+
function_name = "${local.project_name}-landing-metadata-cronjob-lambda"
87+
description = "Query landing pages in db; compute & archive their metadata"
88+
handler = "landing_metadata_cronjob"
89+
runtime = "go1.x"
90+
91+
source_path = [{
92+
path = "${var.repo_dir}/lambda_golang/"
93+
commands = ["${local.go_build_flags} go build ./cmd/landing_metadata_cronjob", ":zip"]
94+
patterns = ["landing_metadata_cronjob"]
95+
}]
96+
97+
timeout = 900
98+
cloudwatch_logs_retention_in_days = 7
99+
100+
publish = true
101+
102+
attach_policy_statements = true
103+
policy_statements = {
104+
allow_db_query = {
105+
effect = "Allow",
106+
actions = [
107+
"dynamodb:Query",
108+
"dynamodb:UpdateItem",
109+
],
110+
resources = [
111+
local.media_table_arn,
112+
"${local.media_table_arn}/index/metadataIndex"
113+
]
114+
}
115+
s3_archive_bucket = {
116+
effect = "Allow",
117+
actions = [
118+
"s3:GetObject",
119+
"s3:PutObject",
120+
],
121+
resources = [
122+
"${data.aws_s3_bucket.archive.arn}/*",
123+
]
124+
}
125+
# enable getting 404 instead of 403 in case of not found
126+
# https://stackoverflow.com/a/19808954/9814131
127+
s3_archive_bucket_check_404 = {
128+
effect = "Allow",
129+
actions = [
130+
"s3:ListBucket",
131+
],
132+
resources = [
133+
"${data.aws_s3_bucket.archive.arn}",
134+
]
135+
}
136+
}
137+
138+
environment_variables = {
139+
SLACK_WEBHOOK_URL = var.slack_post_webhook_url
140+
LOG_LEVEL = "DEBUG"
141+
DEBUG = "true"
142+
S3_ARCHIVE_BUCKET = data.aws_s3_bucket.archive.id
143+
DYNAMODB_TABLE_ID = local.media_table_id
144+
}
145+
146+
tags = {
147+
Project = local.project_name
148+
}
149+
}

cloud_module/pipeline/sfn_def/batch_stories_def.json

+6
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"Parameters": {
1313
"story.$": "$$.Map.Item.Value",
1414
"newsSiteAlias.$": "$.newsSiteAlias",
15+
"landingPageUuid.$": "$.landingPageUuid",
1516
"landingPageTimeStamp.$": "$.landingPageTimeStamp"
1617
},
1718
"Iterator": {
@@ -32,6 +33,11 @@
3233
}
3334
}
3435
},
36+
"Next": "Stories-Finalizer"
37+
},
38+
"Stories-Finalizer": {
39+
"Type":"Task",
40+
"Resource": "${STORIES_FINALIZER_LAMBDA_ARN}",
3541
"End": true
3642
}
3743
}

0 commit comments

Comments
 (0)