Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions iac/modules/job-logs-collector/configs/vector.toml
Original file line number Diff line number Diff line change
Expand Up @@ -113,22 +113,18 @@ category = "{{ category }}"

%{ if enable_otel_router_logs }
[sinks.otel_router_non_internal]
type = "opentelemetry"
type = "http"
inputs = [ "remove_internal" ]
uri = "http://127.0.0.1:${otel_router_http_port}/logs"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The change from a Consul service address to 127.0.0.1 will break log routing in Nomad environments using bridged networking unless the otel-router is running as a sidecar in the same task group. If the router is a separate service, it should continue to use the Consul discovery address to ensure connectivity across the cluster regardless of the network configuration.

uri = "http://otel-router.service.consul:${otel_router_http_port}/logs"

method = "post"
healthcheck.enabled = false
buffer.type = "memory"
buffer.max_events = 500
buffer.when_full = "drop_newest"
encoding.codec = "json"
framing.method = "newline_delimited"

[sinks.otel_router_non_internal.protocol]
type = "http"
uri = "http://otel-router.service.consul:4318/v1/logs"
method = "post"

[sinks.otel_router_non_internal.protocol.encoding]
codec = "otlp"

[sinks.otel_router_non_internal.protocol.request]
[sinks.otel_router_non_internal.request]
retry_attempts = 0
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Setting retry_attempts to 0 causes immediate log loss during transient network issues or service restarts. Increasing the number of retries would improve the reliability of the log delivery to the otel-router.

retry_attempts = 3

Copy link
Copy Markdown
Contributor Author

@wj-e2b wj-e2b May 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There shouldn't be any network issues over localhost. We have bigger problems if there are and retries are not gonna help. This saves vector from having to buffer any logs.

timeout_secs = 2
%{ endif }
Expand Down
1 change: 1 addition & 0 deletions iac/modules/job-logs-collector/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ locals {
grafana_api_key = var.grafana_api_key

enable_otel_router_logs = var.enable_otel_router_logs
otel_router_http_port = var.otel_router_http_port
},
)

Expand Down
6 changes: 6 additions & 0 deletions iac/modules/job-logs-collector/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ variable "enable_otel_router_logs" {
description = "Enable teeing non-internal customer logs from Vector to otel-router."
}

variable "otel_router_http_port" {
type = number
default = 4321
description = "Local otel-router Vector-compatible logs port used by Vector when otel-router log teeing is enabled."
}

variable "vector_config_override" {
type = string
default = ""
Expand Down
2 changes: 2 additions & 0 deletions iac/provider-aws/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ tf_vars := AWS_PROFILE=$(AWS_PROFILE) AWS_REGION=$(AWS_REGION) \
$(call tfvar, BUILD_SERVER_NESTED_VIRTUALIZATION) \
$(call tfvar, AWS_ACCOUNT_ID) \
$(call tfvar, AWS_REGION) \
$(call tfvar, ENABLE_OTEL_ROUTER_LOGS) \
$(call tfvar, OTEL_ROUTER_HTTP_PORT) \
$(call tfvar, DB_MAX_OPEN_CONNECTIONS) \
$(call tfvar, DB_MIN_IDLE_CONNECTIONS) \
$(call tfvar, AUTH_DB_MAX_OPEN_CONNECTIONS) \
Expand Down
1 change: 1 addition & 0 deletions iac/provider-aws/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ module "nomad" {
launch_darkly_api_key = module.init.launch_darkly_api_key

enable_otel_router_logs = var.enable_otel_router_logs
otel_router_http_port = var.otel_router_http_port

db_max_open_connections = var.db_max_open_connections
db_min_idle_connections = var.db_min_idle_connections
Expand Down
1 change: 1 addition & 0 deletions iac/provider-aws/nomad/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ module "logs_collector" {
loki_endpoint = "http://loki.service.consul:${var.loki_port}"

enable_otel_router_logs = var.enable_otel_router_logs
otel_router_http_port = var.otel_router_http_port

vector_health_port = var.logs_health_proxy_port
vector_api_port = var.logs_proxy_port
Expand Down
6 changes: 6 additions & 0 deletions iac/provider-aws/nomad/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,12 @@ variable "enable_otel_router_logs" {
description = "Enable teeing non-internal customer logs from Vector to otel-router."
}

variable "otel_router_http_port" {
type = number
default = 4321
description = "Local otel-router Vector-compatible logs port used by Vector when otel-router log teeing is enabled."
}

# Feature flags
variable "launch_darkly_api_key" {
type = string
Expand Down
6 changes: 6 additions & 0 deletions iac/provider-aws/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -198,3 +198,9 @@ variable "enable_otel_router_logs" {
default = false
description = "Enable teeing non-internal customer logs from Vector to otel-router."
}

variable "otel_router_http_port" {
type = number
default = 4321
description = "Local otel-router Vector-compatible logs port used by Vector when otel-router log teeing is enabled."
}
2 changes: 2 additions & 0 deletions iac/provider-gcp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ tf_vars := \
$(call tfvar, LOKI_RESOURCES_MEMORY_MB) \
$(call tfvar, OTEL_COLLECTOR_RESOURCES_CPU_COUNT) \
$(call tfvar, OTEL_COLLECTOR_RESOURCES_MEMORY_MB) \
$(call tfvar, ENABLE_OTEL_ROUTER_LOGS) \
$(call tfvar, OTEL_ROUTER_HTTP_PORT) \
$(call tfvar, TEMPLATE_BUCKET_NAME) \
$(call tfvar, TEMPLATE_BUCKET_LOCATION) \
$(call tfvar, ENVD_TIMEOUT) \
Expand Down
1 change: 1 addition & 0 deletions iac/provider-gcp/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ module "nomad" {
otel_collector_resources_memory_mb = var.otel_collector_resources_memory_mb
otel_collector_resources_cpu_count = var.otel_collector_resources_cpu_count
enable_otel_router_logs = var.enable_otel_router_logs
otel_router_http_port = var.otel_router_http_port

# Dashboard API
dashboard_api_count = var.dashboard_api_count
Expand Down
1 change: 1 addition & 0 deletions iac/provider-gcp/nomad/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ module "logs_collector" {
loki_endpoint = "http://loki.service.consul:${var.loki_service_port.port}"

enable_otel_router_logs = var.enable_otel_router_logs
otel_router_http_port = var.otel_router_http_port

vector_health_port = var.logs_health_proxy_port.port
vector_api_port = var.logs_proxy_port.port
Expand Down
6 changes: 6 additions & 0 deletions iac/provider-gcp/nomad/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,12 @@ variable "enable_otel_router_logs" {
description = "Enable teeing non-internal customer logs from Vector to otel-router."
}

variable "otel_router_http_port" {
type = number
default = 4321
description = "Local otel-router Vector-compatible logs port used by Vector when otel-router log teeing is enabled."
}

variable "clickhouse_server_port" {
type = object({
name = string
Expand Down
6 changes: 6 additions & 0 deletions iac/provider-gcp/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,12 @@ variable "enable_otel_router_logs" {
description = "Enable teeing non-internal customer logs from Vector to otel-router."
}

variable "otel_router_http_port" {
type = number
default = 4321
description = "Local otel-router Vector-compatible logs port used by Vector when otel-router log teeing is enabled."
}

variable "clickhouse_resources_memory_mb" {
type = number
default = 8192
Expand Down
Loading