Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 5 additions & 8 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,11 @@ module "services" {
monitoring_telemetry = var.monitoring_telemetry

# Data stores
postgres_username = module.database.postgres_database_username
postgres_password = module.database.postgres_database_password
postgres_host = module.database.postgres_database_address
postgres_port = module.database.postgres_database_port
redis_host = module.redis.redis_endpoint
redis_port = module.redis.redis_port
postgres_database_secret_arn = module.database.postgres_database_secret_arn
postgres_host = module.database.postgres_database_address
postgres_port = module.database.postgres_database_port
redis_host = module.redis.redis_endpoint
redis_port = module.redis.redis_port

brainstore_enabled = var.enable_brainstore
brainstore_default = var.brainstore_default
Expand Down Expand Up @@ -379,5 +378,3 @@ module "brainstore" {
cache_file_size_writer = var.brainstore_cache_file_size_writer
locks_s3_path = var.brainstore_locks_s3_path
}


30 changes: 27 additions & 3 deletions modules/services/iam.tf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ resource "aws_iam_policy" "api_handler_lambda_policies" {
Action = ["lambda:InvokeFunction"]
Effect = "Allow"
Resource = aws_lambda_function.catchup_etl.arn
},
{
Sid = "ReadPostgresSecret"
Action = ["secretsmanager:GetSecretValue"]
Effect = "Allow"
Resource = var.postgres_database_secret_arn
}
]
Version = "2012-10-17"
Expand Down Expand Up @@ -70,6 +76,13 @@ resource "aws_iam_role_policy_attachment" "lambda_vpc_access" {
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole"
}

resource "aws_kms_grant" "default_role_postgres_secret" {
name = "${var.deployment_name}-default-role-postgres-secret"
key_id = var.kms_key_arn
grantee_principal = aws_iam_role.default_role.arn
operations = ["Decrypt"]
}

resource "aws_iam_role_policy" "default_role_policy" {
name = "${var.deployment_name}-DefaultRolePolicy"
role = aws_iam_role.default_role.id
Expand Down Expand Up @@ -103,9 +116,20 @@ resource "aws_iam_role_policy" "default_role_policy" {
"arn:aws:logs:${data.aws_region.current.region}:${data.aws_caller_identity.current.account_id}:log-group:/braintrust/${var.deployment_name}/*",
]
},
{
Sid = "ReadPostgresSecret"
Action = ["secretsmanager:GetSecretValue"]
Effect = "Allow"
Resource = var.postgres_database_secret_arn
},
{
Sid = "UseKmsForPostgresSecret"
Action = [
"kms:Decrypt"
]
Effect = "Allow"
Resource = var.kms_key_arn
},
]
})
}



4 changes: 3 additions & 1 deletion modules/services/lambda-aiproxy.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ resource "aws_lambda_function" "ai_proxy" {
# See https://github.com/tobilg/duckdb-nodejs-layer
layers = concat(
[local.duckdb_nodejs_arm64_layer_arn],
local.observability_enabled ? [local.datadog_node_layer_arn, local.datadog_extension_arm_layer_arn] : []
local.observability_enabled ? [local.datadog_node_layer_arn, local.datadog_extension_arm_layer_arn] : [],
[data.aws_lambda_layer_version.aws_params_secrets_arm64.arn],
[aws_lambda_layer_version.secrets_wrapper.arn],
)

logging_config {
Expand Down
10 changes: 8 additions & 2 deletions modules/services/lambda-apihandler.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ locals {
PRIMARY_ORG_NAME = var.primary_org_name
BRAINTRUST_DEPLOYMENT_NAME = var.deployment_name

PG_URL = local.postgres_url
PG_HOST = var.postgres_host
PG_PORT = var.postgres_port
DATABASE_SECRETS_ARN = var.postgres_database_secret_arn
AWS_LAMBDA_EXEC_WRAPPER = "/opt/bin/aws-sm-wrapper.sh"

REDIS_HOST = var.redis_host
REDIS_PORT = var.redis_port
RESPONSE_BUCKET = local.lambda_responses_bucket_id
Expand Down Expand Up @@ -81,7 +85,9 @@ resource "aws_lambda_function" "api_handler" {
# See https://github.com/tobilg/duckdb-nodejs-layer
layers = concat(
[local.duckdb_nodejs_arm64_layer_arn],
local.observability_enabled ? [local.datadog_node_layer_arn, local.datadog_extension_arm_layer_arn] : []
local.observability_enabled ? [local.datadog_node_layer_arn, local.datadog_extension_arm_layer_arn] : [],
[data.aws_lambda_layer_version.aws_params_secrets_arm64.arn],
[aws_lambda_layer_version.secrets_wrapper.arn],
)

ephemeral_storage {
Expand Down
10 changes: 8 additions & 2 deletions modules/services/lambda-automation-cron.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ resource "aws_lambda_function" "automation_cron" {
# See https://github.com/tobilg/duckdb-nodejs-layer
layers = concat(
[local.duckdb_nodejs_arm64_layer_arn],
local.observability_enabled ? [local.datadog_node_layer_arn, local.datadog_extension_arm_layer_arn] : []
local.observability_enabled ? [local.datadog_node_layer_arn, local.datadog_extension_arm_layer_arn] : [],
[data.aws_lambda_layer_version.aws_params_secrets_arm64.arn],
[aws_lambda_layer_version.secrets_wrapper.arn],
)

ephemeral_storage {
Expand All @@ -31,10 +33,12 @@ resource "aws_lambda_function" "automation_cron" {
environment {
variables = merge({
ORG_NAME = var.braintrust_org_name
PG_URL = local.postgres_url
REDIS_HOST = var.redis_host
REDIS_PORT = var.redis_port
REDIS_URL = "redis://${var.redis_host}:${var.redis_port}"
PG_HOST = var.postgres_host
PG_PORT = var.postgres_port
DATABASE_SECRETS_ARN = var.postgres_database_secret_arn
BRAINSTORE_ENABLED = var.brainstore_enabled
BRAINSTORE_BACKFILL_HISTORICAL_BATCH_SIZE = var.brainstore_etl_batch_size
BRAINSTORE_BACKFILL_ENABLE_NONHISTORICAL = var.brainstore_default
Expand All @@ -43,6 +47,8 @@ resource "aws_lambda_function" "automation_cron" {
BRAINSTORE_REALTIME_WAL_BUCKET = local.brainstore_s3_bucket
FUNCTION_SECRET_KEY = var.function_tools_secret_key
CRON_OVERRIDE_SECRET_KEY = random_password.service_token_secret_key.result
AWS_LAMBDA_EXEC_WRAPPER = "/opt/bin/aws-sm-wrapper.sh"

},
var.extra_env_vars.AutomationCron,
local.observability_enabled ? merge(local.datadog_env_vars, {
Expand Down
11 changes: 9 additions & 2 deletions modules/services/lambda-billing-cron.tf
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,25 @@ resource "aws_lambda_function" "billing_cron" {
architectures = ["arm64"]
kms_key_arn = var.kms_key_arn

layers = local.observability_enabled ? [local.datadog_node_layer_arn, local.datadog_extension_arm_layer_arn] : []
layers = concat(
local.observability_enabled ? [local.datadog_node_layer_arn, local.datadog_extension_arm_layer_arn] : [],
[data.aws_lambda_layer_version.aws_params_secrets_arm64.arn],
[aws_lambda_layer_version.secrets_wrapper.arn],
)

environment {
variables = merge({
ORG_NAME = var.braintrust_org_name
PG_URL = local.postgres_url
REDIS_HOST = var.redis_host
REDIS_PORT = var.redis_port
PG_HOST = var.postgres_host
PG_PORT = var.postgres_port
DATABASE_SECRETS_ARN = var.postgres_database_secret_arn
CONTROL_PLANE_TELEMETRY = var.monitoring_telemetry
TELEMETRY_DISABLE_AGGREGATION = var.disable_billing_telemetry_aggregation
TELEMETRY_LOG_LEVEL = var.billing_telemetry_log_level
SERVICE_TOKEN_SECRET_KEY = var.function_tools_secret_key
AWS_LAMBDA_EXEC_WRAPPER = "/opt/bin/aws-sm-wrapper.sh"
},
var.extra_env_vars.BillingCron,
local.observability_enabled ? merge(local.datadog_env_vars, {
Expand Down
11 changes: 9 additions & 2 deletions modules/services/lambda-catchup-etl.tf
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,26 @@ resource "aws_lambda_function" "catchup_etl" {
architectures = ["arm64"]
kms_key_arn = var.kms_key_arn

layers = local.observability_enabled ? [local.datadog_node_layer_arn, local.datadog_extension_arm_layer_arn] : []
layers = concat(
local.observability_enabled ? [local.datadog_node_layer_arn, local.datadog_extension_arm_layer_arn] : [],
[data.aws_lambda_layer_version.aws_params_secrets_arm64.arn],
[aws_lambda_layer_version.secrets_wrapper.arn],
)

environment {
variables = merge({
ORG_NAME = var.braintrust_org_name
PG_URL = local.postgres_url
REDIS_HOST = var.redis_host
REDIS_PORT = var.redis_port
PG_HOST = var.postgres_host
PG_PORT = var.postgres_port
DATABASE_SECRETS_ARN = var.postgres_database_secret_arn
BRAINSTORE_ENABLED = var.brainstore_enabled
BRAINSTORE_URL = local.brainstore_url
BRAINSTORE_WRITER_URL = local.brainstore_writer_url
BRAINSTORE_REALTIME_WAL_BUCKET = local.brainstore_s3_bucket
BRAINSTORE_BACKFILL_HISTORICAL_BATCH_SIZE = var.brainstore_etl_batch_size
AWS_LAMBDA_EXEC_WRAPPER = "/opt/bin/aws-sm-wrapper.sh"
},
var.extra_env_vars.CatchupETL,
local.observability_enabled ? merge(local.datadog_env_vars, {
Expand Down
11 changes: 9 additions & 2 deletions modules/services/lambda-migrate-database.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ resource "aws_lambda_function" "migrate_database" {
publish = true
kms_key_arn = var.kms_key_arn

layers = local.observability_enabled ? [local.datadog_python_layer_arn, local.datadog_extension_layer_arn] : []
layers = concat(
local.observability_enabled ? [local.datadog_node_layer_arn, local.datadog_extension_arm_layer_arn] : [],
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Use Python/x86 Datadog layers for migration Lambda

When internal_observability_api_key is set, this Lambda now attaches Datadog-Node22-x plus the ARM extension even though migrate_database runs python3.13 on the default x86_64 architecture. In that mode the update can fail due to incompatible layer architecture, and even if it deploys, the Python Datadog handler path (datadog_lambda.handler.handler) is missing the expected Python layer, so migration invocations break in observability-enabled environments.

Useful? React with 👍 / 👎.

[data.aws_lambda_layer_version.aws_params_secrets_x86_64.arn],
[aws_lambda_layer_version.secrets_wrapper.arn],
)

logging_config {
log_format = local.observability_enabled ? "JSON" : "Text"
Expand All @@ -25,8 +29,11 @@ resource "aws_lambda_function" "migrate_database" {
environment {
variables = merge({
BRAINTRUST_RUN_DRAFT_MIGRATIONS = var.run_draft_migrations
PG_URL = local.postgres_url
INSERT_LOGS2 = "true"
PG_HOST = var.postgres_host
PG_PORT = var.postgres_port
DATABASE_SECRETS_ARN = var.postgres_database_secret_arn
AWS_LAMBDA_EXEC_WRAPPER = "/opt/bin/aws-sm-wrapper.sh"
},
var.extra_env_vars.MigrateDatabaseFunction,
local.observability_enabled ? merge(local.datadog_env_vars, {
Expand Down
10 changes: 8 additions & 2 deletions modules/services/lambda-quarantine-warmup.tf
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,24 @@ resource "aws_lambda_function" "quarantine_warmup" {
# See https://github.com/tobilg/duckdb-nodejs-layer
layers = concat(
[local.duckdb_nodejs_arm64_layer_arn],
local.observability_enabled ? [local.datadog_node_layer_arn, local.datadog_extension_arm_layer_arn] : []
local.observability_enabled ? [local.datadog_node_layer_arn, local.datadog_extension_arm_layer_arn] : [],
[data.aws_lambda_layer_version.aws_params_secrets_arm64.arn],
[aws_lambda_layer_version.secrets_wrapper.arn],
)

environment {
variables = merge({
ORG_NAME = var.braintrust_org_name
BRAINTRUST_DEPLOYMENT_NAME = var.deployment_name

PG_URL = local.postgres_url
REDIS_HOST = var.redis_host
REDIS_PORT = var.redis_port

PG_HOST = var.postgres_host
PG_PORT = var.postgres_port
DATABASE_SECRETS_ARN = var.postgres_database_secret_arn
AWS_LAMBDA_EXEC_WRAPPER = "/opt/bin/aws-sm-wrapper.sh"

QUARANTINE_INVOKE_ROLE = var.use_quarantine_vpc && var.quarantine_invoke_role_arn != null ? var.quarantine_invoke_role_arn : ""
QUARANTINE_FUNCTION_ROLE = var.use_quarantine_vpc && var.quarantine_function_role_arn != null ? var.quarantine_function_role_arn : ""
QUARANTINE_PRIVATE_SUBNET_1_ID = var.use_quarantine_vpc ? var.quarantine_vpc_private_subnets[0] : ""
Expand Down
87 changes: 87 additions & 0 deletions modules/services/lambda-secrets-extension.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Region must be one of: us-east-1, us-east-2, us-west-2, eu-west-1, ca-central-1, ap-southeast-2
# ARNs: https://docs.aws.amazon.com/systems-manager/latest/userguide/ps-integration-lambda-extensions.html#ps-integration-lambda-extensions-add
locals {
secrets_ext_arns_arm64 = {
us-east-1 = {
arn = "arn:aws:lambda:us-east-1:177933569100:layer:AWS-Parameters-and-Secrets-Lambda-Extension-Arm64"
version = 61
}
us-east-2 = {
arn = "arn:aws:lambda:us-east-2:590474943231:layer:AWS-Parameters-and-Secrets-Lambda-Extension-Arm64"
version = 67
}
us-west-2 = {
arn = "arn:aws:lambda:us-west-2:345057560386:layer:AWS-Parameters-and-Secrets-Lambda-Extension-Arm64"
version = 61
}
Comment on lines +3 to +16
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can these be looked up? This isn't maintainable.

eu-west-1 = {
arn = "arn:aws:lambda:eu-west-1:015030872274:layer:AWS-Parameters-and-Secrets-Lambda-Extension-Arm64"
version = 63
}
ca-central-1 = {
arn = "arn:aws:lambda:ca-central-1:200266452380:layer:AWS-Parameters-and-Secrets-Lambda-Extension-Arm64"
version = 62
}
ap-southeast-2 = {
arn = "arn:aws:lambda:ap-southeast-2:665172237481:layer:AWS-Parameters-and-Secrets-Lambda-Extension-Arm64"
version = 63
}
}

secrets_ext_arns_x86_64 = {
us-east-1 = {
arn = "arn:aws:lambda:us-east-1:177933569100:layer:AWS-Parameters-and-Secrets-Lambda-Extension"
version = 67
}
us-east-2 = {
arn = "arn:aws:lambda:us-east-2:590474943231:layer:AWS-Parameters-and-Secrets-Lambda-Extension"
version = 73
}
us-west-2 = {
arn = "arn:aws:lambda:us-west-2:345057560386:layer:AWS-Parameters-and-Secrets-Lambda-Extension"
version = 67
}
eu-west-1 = {
arn = "arn:aws:lambda:eu-west-1:015030872274:layer:AWS-Parameters-and-Secrets-Lambda-Extension"
version = 63
}
ca-central-1 = {
arn = "arn:aws:lambda:ca-central-1:200266452380:layer:AWS-Parameters-and-Secrets-Lambda-Extension"
version = 70
}
ap-southeast-2 = {
arn = "arn:aws:lambda:ap-southeast-2:665172237481:layer:AWS-Parameters-and-Secrets-Lambda-Extension"
version = 63
}
}
}

data "aws_lambda_layer_version" "aws_params_secrets_arm64" {
layer_name = local.secrets_ext_arns_arm64[data.aws_region.current.id].arn
version = local.secrets_ext_arns_arm64[data.aws_region.current.id].version
}

data "aws_lambda_layer_version" "aws_params_secrets_x86_64" {
layer_name = local.secrets_ext_arns_x86_64[data.aws_region.current.id].arn
version = local.secrets_ext_arns_x86_64[data.aws_region.current.id].version
}

#-----------------------------------------------------
# TODO: relocate layer to `dist` and add to postbuild
#-----------------------------------------------------

data "archive_file" "secrets_wrapper_layer" {
type = "zip"
source_dir = "${path.module}/secrets-wrapper"
output_path = "${path.module}/.build/wrapper_layer.zip"
}

resource "aws_lambda_layer_version" "secrets_wrapper" {
layer_name = "secrets-env-wrapper"
description = "Exec wrapper that fetches Secrets Manager secrets and injects them as environment variables."
filename = data.archive_file.secrets_wrapper_layer.output_path
source_code_hash = data.archive_file.secrets_wrapper_layer.output_base64sha256
compatible_runtimes = ["nodejs22.x", "python3.13"]

compatible_architectures = ["arm64", "x86_64"]
}
10 changes: 5 additions & 5 deletions modules/services/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ locals {
observability_enabled = nonsensitive(var.internal_observability_api_key != null && var.internal_observability_api_key != "")
datadog_node_layer_arn = "arn:aws:lambda:${data.aws_region.current.region}:464622532012:layer:Datadog-Node22-x:131"
datadog_extension_arm_layer_arn = "arn:aws:lambda:${data.aws_region.current.region}:464622532012:layer:Datadog-Extension-ARM:90"
datadog_python_layer_arn = "arn:aws:lambda:${data.aws_region.current.region}:464622532012:layer:Datadog-Python313:118"
datadog_extension_layer_arn = "arn:aws:lambda:${data.aws_region.current.region}:464622532012:layer:Datadog-Extension:70"
nodejs_datadog_handler = "/opt/nodejs/node_modules/datadog-lambda-js/handler.handler"
python_datadog_handler = "datadog_lambda.handler.handler"
# datadog_python_layer_arn = "arn:aws:lambda:${data.aws_region.current.region}:464622532012:layer:Datadog-Python313:118"
# datadog_extension_layer_arn = "arn:aws:lambda:${data.aws_region.current.region}:464622532012:layer:Datadog-Extension:70"
nodejs_datadog_handler = "/opt/nodejs/node_modules/datadog-lambda-js/handler.handler"
python_datadog_handler = "datadog_lambda.handler.handler"
datadog_env_vars = {
DD_SITE = "${var.internal_observability_region}.datadoghq.com"
DD_API_KEY = var.internal_observability_api_key != null ? var.internal_observability_api_key : ""
Expand All @@ -36,7 +36,7 @@ locals {
lambda => trimspace(data.http.lambda_versions[lambda].response_body)
}

postgres_url = "postgres://${var.postgres_username}:${var.postgres_password}@${var.postgres_host}:${var.postgres_port}/postgres"
# postgres_url = "postgres://${var.postgres_username}:${var.postgres_password}@${var.postgres_host}:${var.postgres_port}/postgres"
using_brainstore_writer = var.brainstore_writer_hostname != null && var.brainstore_writer_hostname != ""
using_brainstore_fast_reader = var.brainstore_fast_reader_hostname != null && var.brainstore_fast_reader_hostname != ""
brainstore_url = var.brainstore_enabled ? "http://${var.brainstore_hostname}:${var.brainstore_port}" : ""
Expand Down
1 change: 0 additions & 1 deletion modules/services/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,3 @@ output "ai_proxy_url_ssm_arn" {
description = "The ARN of the SSM parameter containing the AI proxy URL"
value = aws_ssm_parameter.ai_proxy_url.arn
}

Loading
Loading