Skip to content

Commit 3848da0

Browse files
committed
refactor: transition billing-usage-collector from OTLP push to Kubernetes log tailing via Vector's kubernetes_logs source
1 parent 34706ec commit 3848da0

2 files changed

Lines changed: 70 additions & 57 deletions

File tree

config/tools/billing-usage-collector/kustomization.yaml

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -38,30 +38,46 @@ helmCharts:
3838
path: "/logs"
3939
decoding:
4040
codec: json
41-
# Envoy Gateway pushes access logs here via its OpenTelemetry access
42-
# log sink (OTLP). The JSON access log fields arrive as OTLP log-record
43-
# attributes (i.e. under `.attributes.*`). This is the real ingestion
44-
# path for live traffic; the `nso_logs` http_server above is only used
45-
# by the e2e test to inject hand-crafted log lines.
41+
# Envoy Gateway writes access logs to stdout via its File access log
42+
# sink (path: /dev/stdout), exactly as the production downstream
43+
# gateway EnvoyProxy does. The node-local Vector agent tails the
44+
# container log files through the kubernetes_logs source -- the real,
45+
# file-based ingestion path for live traffic. The JSON access log
46+
# line arrives as a string in `.message`. The `nso_logs` http_server
47+
# above is only used by the e2e test to inject hand-crafted log lines.
4648
envoy_access_logs:
47-
type: opentelemetry
48-
grpc:
49-
address: "0.0.0.0:4317"
50-
http:
51-
address: "0.0.0.0:4318"
49+
type: kubernetes_logs
50+
# Only tail the Envoy proxy pods; their access logs live on the
51+
# `envoy` container's stdout. Other namespaces/containers are
52+
# ignored so the agent does not parse unrelated cluster logs.
53+
extra_field_selector: "metadata.namespace=envoy-gateway-system"
54+
# Re-scan for new container log files every 2s instead of the 60s
55+
# default so the agent picks up freshly-created Envoy proxy pods
56+
# quickly -- the e2e test provisions a gateway and verifies within a
57+
# short window.
58+
glob_minimum_cooldown_ms: 2000
5259
transforms:
5360
parse_nso_logs:
5461
type: remap
5562
inputs:
5663
- nso_logs
57-
- envoy_access_logs.logs
64+
- envoy_access_logs
5865
source: |
59-
# The OpenTelemetry source (Envoy push) nests the access log fields
60-
# under `.attributes`; the http_server source (e2e test injection)
61-
# delivers them at the top level. Normalize both to top-level fields
62-
# so the rest of this transform is source-agnostic.
63-
if exists(.attributes) {
64-
. = object(.attributes) ?? .
66+
# The kubernetes_logs source (Envoy File sink -> stdout) delivers
67+
# the raw access log line as a string in `.message`; parse it as
68+
# JSON and lift the fields to the top level. The http_server source
69+
# (e2e test injection) already delivers them at the top level.
70+
# Normalize both so the rest of this transform is source-agnostic.
71+
# Lines that are not JSON (e.g. Envoy startup logs) are dropped.
72+
if is_string(.message) {
73+
# `.message` is known to be a string inside this guard, so
74+
# string!() never actually aborts (hence Vector's harmless
75+
# "can't abort infallible function" compile warning).
76+
parsed, err = parse_json(string!(.message))
77+
if err != null {
78+
abort
79+
}
80+
. = object(parsed) ?? {}
6581
}
6682
6783
parsed_route, err = parse_regex(.route_name, r'^httproute/(?P<namespace>[^/]+)/(?P<name>[^/]+)')
@@ -219,7 +235,12 @@ helmCharts:
219235
mountPath: /var/lib/vector
220236
podSecurityContext:
221237
runAsUser: 1000
222-
runAsGroup: 1000
238+
# Primary GID 0 (root group) so the kubernetes_logs source can read the
239+
# Envoy container logs: the kubelet writes them as root:root with
240+
# /var/log/pods at 0750 and each 0.log at 0640 -- both group-readable by
241+
# the root group only. We keep runAsUser 1000 / runAsNonRoot: true so the
242+
# process is still a non-root user; only the supplemental group is root.
243+
runAsGroup: 0
223244
runAsNonRoot: true
224245
securityContext:
225246
allowPrivilegeEscalation: false
@@ -243,16 +264,5 @@ helmCharts:
243264
port: 9881
244265
protocol: TCP
245266
targetPort: 9881
246-
# OTLP endpoints for Envoy Gateway's OpenTelemetry access log sink.
247-
# internalTrafficPolicy: Local keeps each Envoy pod talking to the
248-
# Vector agent on its own node.
249-
- name: otlp-grpc
250-
port: 4317
251-
protocol: TCP
252-
targetPort: 4317
253-
- name: otlp-http
254-
port: 4318
255-
protocol: TCP
256-
targetPort: 4318
257267
podMonitor:
258268
enabled: true

test/e2e/billing/chainsaw-test.yaml

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -156,20 +156,17 @@ spec:
156156
accessLog:
157157
settings:
158158
- sinks:
159-
# File sink: the test still scrapes stdout to derive the
159+
# File sink only -- identical to the production
160+
# downstream gateway EnvoyProxy. Envoy writes the JSON
161+
# access log to stdout; the node-local Vector agent
162+
# tails the container log file (kubernetes_logs source)
163+
# and emits the CloudEvents. This exercises the real,
164+
# file-based production ingestion path -- no OTLP push.
165+
# The test also scrapes the same stdout to derive the
160166
# expected metric values (bytes, duration, project_name).
161167
- type: File
162168
file:
163169
path: /dev/stdout
164-
# OpenTelemetry sink: Envoy pushes the access log to the
165-
# node-local Vector agent automatically, exercising the
166-
# real production ingestion path (no manual forwarding).
167-
- type: OpenTelemetry
168-
openTelemetry:
169-
host: billing-usage-collector-vector.billing-system.svc.cluster.local
170-
port: 4317
171-
resources:
172-
service.name: nso-httproute-signals
173170
format:
174171
type: JSON
175172
json:
@@ -545,10 +542,11 @@ spec:
545542
curl -kvf -H \"Host: ${PRIMARY_HOSTNAME}\" -d 'hello world' http://\${GATEWAY_SERVICE_NAME}.\${GATEWAY_SERVICE_NAMESPACE}.svc.cluster.local/delay/2; \
546543
"
547544
548-
# Verify Vector emitted the CloudEvents. Envoy pushed the access log to
549-
# Vector automatically via its OpenTelemetry sink when the request above
550-
# was served, so this step only derives the expected values and polls the
551-
# mock billing gateway for the result.
545+
# Verify Vector emitted the CloudEvents. Envoy wrote the access log to
546+
# stdout via its File sink when the request above was served, and the
547+
# node-local Vector agent tailed the container log file (kubernetes_logs
548+
# source) automatically, so this step only derives the expected values
549+
# and polls the mock billing gateway for the result.
552550
- script:
553551
timeout: 45s
554552
cluster: nso-infra
@@ -559,16 +557,20 @@ spec:
559557
# =====================================================================
560558
# Validates the full HTTP metering pipeline, end to end:
561559
#
562-
# Envoy access log --(OTLP push)--> Vector opentelemetry source
563-
# | |
564-
# | (also written to stdout for | (VRL transform fans the
565-
# | capturing expected values) | log record out into
566-
# v v CloudEvents)
567-
# captured here (STEP 1-2) mock-billing-gateway (STEP 4)
560+
# Envoy access log --(File sink -> stdout)--> container log file
561+
# | |
562+
# | (captured here for | (Vector agent
563+
# | expected values) | kubernetes_logs
564+
# | | source tails it,
565+
# | | VRL transform fans
566+
# v v it into CloudEvents)
567+
# captured here (STEP 1-2) mock-billing-gateway (STEP 4)
568568
#
569-
# Envoy pushes the access log to Vector automatically (OpenTelemetry
570-
# sink on custom-proxy-config -> billing-usage-collector-vector:4317)
571-
# when the request is served -- the test does NOT forward it manually.
569+
# Envoy writes the access log to stdout (File sink on
570+
# custom-proxy-config, path: /dev/stdout) when the request is
571+
# served; the node-local Vector agent tails the container log file
572+
# automatically -- the test does NOT forward it manually. This is
573+
# the same file-based ingestion path as the production gateway.
572574
#
573575
# A single access log line is expected to produce FOUR CloudEvents,
574576
# one per usage dimension, all sharing the same subject
@@ -627,11 +629,12 @@ spec:
627629
echo " - Project name (expected): $PROJECT_NAME"
628630
629631
# --- STEP 3: (no manual forwarding) --------------------------------
630-
# The request above already caused Envoy to push this access log to
631-
# Vector automatically via its OpenTelemetry sink (custom-proxy-config
632-
# -> billing-usage-collector-vector:4317). Vector's VRL transform
633-
# parses it and emits the four CloudEvents to the mock-billing-gateway
634-
# sink, so there is nothing to POST here -- we just poll for the result.
632+
# The request above already caused Envoy to write this access log to
633+
# stdout via its File sink (custom-proxy-config, path: /dev/stdout).
634+
# The node-local Vector agent tails the Envoy container log file via
635+
# its kubernetes_logs source, and Vector's VRL transform parses it
636+
# and emits the four CloudEvents to the mock-billing-gateway sink, so
637+
# there is nothing to POST here -- we just poll for the result.
635638
636639
# --- STEP 4: verify the CloudEvents the mock billing gateway received
637640
# Poll the mock gateway's request log (Vector may take a moment to

0 commit comments

Comments
 (0)