Skip to content

Commit 218eb4a

Browse files
vparfonovclaude
authored andcommitted
fix(cloudwatch): cap maxWrite at 1MB and validate configuration
CloudWatch Logs PutLogEvents API has a 1MB (1,048,576 bytes) limit per request. Previously, ClusterLogForwarder allowed setting tuning.maxWrite to values exceeding this limit (e.g., 10M), causing batch upload failures with "Upload too large" errors and event drops. Implement two layers of protection: 1. Validation: Reject ClusterLogForwarder specs at admission time if CloudWatch output maxWrite exceeds 1MB, providing users with clear error messages. This follows the existing pattern used for Azure Logs Ingestion output which has the same limit. 2. Runtime capping: The generator always sets batch.max_bytes to 1MB in the Vector configuration, ensuring safe operation regardless of whether tuning is configured. Fixes: LOG-9448 Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
1 parent b4d7692 commit 218eb4a

12 files changed

Lines changed: 128 additions & 2 deletions

internal/generator/vector/output/aws/cloudwatch/cloudwatch.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ import (
2222
const (
2323
groupNameField = "cw_group_name"
2424
templatedInternalGroupNameField = `{{ _internal.` + groupNameField + ` }}`
25+
26+
// CloudwatchDefaultMaxBytes CloudWatch Logs PutLogEvents API has a 1MB per request limit
27+
CloudwatchDefaultMaxBytes = 1_048_576
2528
)
2629

2730
func New(id string, o *adapters.Output, inputs []string, secrets observability.Secrets, op utils.Options) (_ string, sink types.Sink, tfs api.Transforms) {
@@ -42,7 +45,14 @@ func New(id string, o *adapters.Output, inputs []string, secrets observability.S
4245
} else {
4346
s.Compression = sinks.CompressionType(o.GetTuning().Compression)
4447
}
45-
s.Batch = common.NewApiBatch(o)
48+
if batch := common.NewApiBatch(o); batch != nil {
49+
if batch.MaxBytes > CloudwatchDefaultMaxBytes {
50+
batch.MaxBytes = CloudwatchDefaultMaxBytes
51+
}
52+
s.Batch = batch
53+
} else {
54+
s.Batch = &sinks.Batch{MaxBytes: CloudwatchDefaultMaxBytes}
55+
}
4656
s.Buffer = common.NewApiBuffer(o)
4757
s.Request = common.NewApiRequest(o)
4858
s.TLS = tls.NewTls(o, secrets, op)

internal/generator/vector/output/aws/cloudwatch/files/cw_groupname_with_aws_credentials.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,8 @@ except_fields = ["_internal"]
4343
credentials_file = "/var/run/ocp-collector/config/my-forwarder-aws-creds/credentials"
4444
profile = "output_cw"
4545

46+
[sinks.cw.batch]
47+
max_bytes = 1048576
48+
4649
[sinks.cw.healthcheck]
4750
enabled = false

internal/generator/vector/output/aws/cloudwatch/files/cw_key_auth_and_assume_role.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,5 +45,8 @@ secret_access_key = "SECRET[kubernetes_secret.vector-cw-secret/aws_secret_access
4545
assume_role = "SECRET[kubernetes_secret.secretwithcredentials/my_role_arn]"
4646
external_id = "unique-external-id"
4747

48+
[sinks.cw.batch]
49+
max_bytes = 1048576
50+
4851
[sinks.cw.healthcheck]
4952
enabled = false

internal/generator/vector/output/aws/cloudwatch/files/cw_with_groupname.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,8 @@ except_fields = ["_internal"]
4242
access_key_id = "SECRET[kubernetes_secret.vector-cw-secret/aws_access_key_id]"
4343
secret_access_key = "SECRET[kubernetes_secret.vector-cw-secret/aws_secret_access_key]"
4444

45+
[sinks.cw.batch]
46+
max_bytes = 1048576
47+
4548
[sinks.cw.healthcheck]
4649
enabled = false

internal/generator/vector/output/aws/cloudwatch/files/cw_with_tls_and_default_mintls_ciphers.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ except_fields = ["_internal"]
4343
min_tls_version = "VersionTLS12"
4444
ciphersuites = "TLS_AES_128_GCM_SHA256,TLS_AES_256_GCM_SHA384,TLS_CHACHA20_POLY1305_SHA256,ECDHE-ECDSA-AES128-GCM-SHA256,ECDHE-RSA-AES128-GCM-SHA256,ECDHE-ECDSA-AES256-GCM-SHA384,ECDHE-RSA-AES256-GCM-SHA384,ECDHE-ECDSA-CHACHA20-POLY1305,ECDHE-RSA-CHACHA20-POLY1305,DHE-RSA-AES128-GCM-SHA256,DHE-RSA-AES256-GCM-SHA384"
4545

46+
[sinks.cw.batch]
47+
max_bytes = 1048576
48+
4649
[sinks.cw.auth]
4750
access_key_id = "SECRET[kubernetes_secret.vector-cw-secret/aws_access_key_id]"
4851
secret_access_key = "SECRET[kubernetes_secret.vector-cw-secret/aws_secret_access_key]"

internal/generator/vector/output/aws/cloudwatch/files/cw_with_tls_spec.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ key_file = "/var/run/ocp-collector/secrets/vector-cw-secret-tls/tls.key"
4444
crt_file = "/var/run/ocp-collector/secrets/vector-cw-secret-tls/tls.crt"
4545
ca_file = "/var/run/ocp-collector/secrets/vector-cw-secret-tls/ca-bundle.crt"
4646

47+
[sinks.cw.batch]
48+
max_bytes = 1048576
49+
4750
[sinks.cw.auth]
4851
access_key_id = "SECRET[kubernetes_secret.vector-cw-secret/aws_access_key_id]"
4952
secret_access_key = "SECRET[kubernetes_secret.vector-cw-secret/aws_secret_access_key]"

internal/generator/vector/output/aws/cloudwatch/files/cw_with_tls_spec_insecure_verify.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ key_file = "/var/run/ocp-collector/secrets/vector-cw-secret-tls/tls.key"
4646
crt_file = "/var/run/ocp-collector/secrets/vector-cw-secret-tls/tls.crt"
4747
ca_file = "/var/run/ocp-collector/secrets/vector-cw-secret-tls/ca-bundle.crt"
4848

49+
[sinks.cw.batch]
50+
max_bytes = 1048576
51+
4952
[sinks.cw.auth]
5053
access_key_id = "SECRET[kubernetes_secret.vector-cw-secret/aws_access_key_id]"
5154
secret_access_key = "SECRET[kubernetes_secret.vector-cw-secret/aws_secret_access_key]"

internal/generator/vector/output/aws/cloudwatch/files/cw_with_tuning.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ codec = "json"
4040
except_fields = ["_internal"]
4141

4242
[sinks.cw.batch]
43-
max_bytes = 10000000
43+
max_bytes = 1048576
4444

4545
[sinks.cw.buffer]
4646
type = "disk"

internal/generator/vector/output/aws/cloudwatch/files/cw_with_url.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,8 @@ except_fields = ["_internal"]
4444
access_key_id = "SECRET[kubernetes_secret.vector-cw-secret/aws_access_key_id]"
4545
secret_access_key = "SECRET[kubernetes_secret.vector-cw-secret/aws_secret_access_key]"
4646

47+
[sinks.cw.batch]
48+
max_bytes = 1048576
49+
4750
[sinks.cw.healthcheck]
4851
enabled = false

internal/validations/observability/outputs/validate.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ func Validate(context internalcontext.ForwarderContext) {
2525
switch out.Type {
2626
case obs.OutputTypeCloudwatch, obs.OutputTypeS3:
2727
messages = append(messages, ValidateAwsAuth(out, context)...)
28+
if out.Type == obs.OutputTypeCloudwatch {
29+
messages = append(messages, validateCloudwatchMaxWrite(out)...)
30+
}
2831
case obs.OutputTypeGoogleCloudLogging:
2932
messages = append(messages, ValidateGCLAuth(out, context)...)
3033
case obs.OutputTypeHTTP:

0 commit comments

Comments
 (0)