Skip to content

Commit bafcd37

Browse files
authored
Merge branch 'feat/http-traffic-metering' into feat/envoy-logs
2 parents 3848da0 + 92f41ae commit bafcd37

39 files changed

Lines changed: 2444 additions & 197 deletions

.github/workflows/publish.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ jobs:
2323
secrets: inherit
2424

2525
publish-kustomize-bundles:
26+
# The bundle pins image references to this build, so only publish it once
27+
# the container image has been built and pushed.
28+
needs: publish-container-image
2629
permissions:
2730
id-token: write
2831
contents: read

Taskfile.yaml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
version: '3'
22

33
includes:
4+
# Documentation tasks
5+
docs:
6+
taskfile: ./docs/Taskfile.yaml
7+
dir: ./docs
48
dev:
59
taskfile: ./Taskfile.dev.yaml
610

@@ -77,4 +81,9 @@ tasks:
7781
echo ""
7882
echo "🎉 All Prometheus rule tests passed."
7983
fi
80-
silent: false
84+
silent: false
85+
86+
generate:
87+
desc: Run code generation (deepcopy, defaults)
88+
deps:
89+
- task: docs:generate

api/v1alpha1/groupversion_info.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,22 @@ var (
2222
AddToScheme = SchemeBuilder.AddToScheme
2323
)
2424

25+
// UpstreamStatusAnnotation carries a verbatim copy of a resource's upstream
26+
// .status subresource down to edge member clusters.
27+
//
28+
// A resource's status is computed authoritatively in the Project control plane.
29+
// Karmada propagates a resource template's spec and metadata
30+
// (labels/annotations) to member clusters but NOT the status subresource, so a
31+
// member-cluster object never carries its upstream status. For resource types
32+
// whose downstream consumer needs that status (e.g. the edge extension server
33+
// reading Connector liveness), the replicator mirrors the full upstream status
34+
// JSON into this annotation — which Karmada DOES propagate — and the consumer
35+
// parses it back, falling back to the live status when the annotation is absent.
36+
//
37+
// The value is the resource's .status object marshalled to JSON verbatim; it is
38+
// resource-agnostic and carries no bespoke schema.
39+
const UpstreamStatusAnnotation = "networking.datumapis.com/upstream-status"
40+
2541
func addKnownTypes(scheme *runtime.Scheme) error {
2642
scheme.AddKnownTypes(GroupVersion,
2743
&Connector{},

config/extension-server/deployment.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ spec:
3737
- maxSkew: 1
3838
topologyKey: kubernetes.io/hostname
3939
whenUnsatisfiable: DoNotSchedule
40+
nodeTaintsPolicy: Honor
4041
labelSelector:
4142
matchLabels:
4243
app.kubernetes.io/component: envoy-gateway-extension-server
@@ -123,6 +124,13 @@ spec:
123124
- name: tls-ca
124125
mountPath: /tls-ca
125126
readOnly: true
127+
# Branded data-plane error pages. Backed by an OPTIONAL ConfigMap so
128+
# the pod starts even when the ConfigMap is absent — the extension
129+
# server then serves the page compiled into the operator image.
130+
# The operator config's gateway.errorPage.bodyPath points here.
131+
- name: error-pages
132+
mountPath: /etc/datum/error-pages
133+
readOnly: true
126134
terminationGracePeriodSeconds: 20
127135
volumes:
128136
# issuer-name and dns-names are placeholders — an overlay must patch them.
@@ -144,3 +152,14 @@ spec:
144152
items:
145153
- key: ca.crt
146154
path: ca.crt
155+
# Optional branded error-page content. The ConfigMap is created in the
156+
# infrastructure (GitOps) repo and carries the key error-5xx.html. With
157+
# optional: true the pod starts cleanly when it is absent, in which case
158+
# the extension server serves the embedded default page.
159+
- name: error-pages
160+
configMap:
161+
name: envoy-error-pages
162+
optional: true
163+
items:
164+
- key: error-5xx.html
165+
path: error-5xx.html

config/manager/config.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,18 @@ gateway:
1313
#
1414
# Default: false. Set to true in environments where dns-operator is deployed.
1515
enableDNSIntegration: false
16+
# errorPage configures the branded data-plane error page served by the
17+
# extension server for edge-generated 5xx responses on the downstream /
18+
# Connector data plane (e.g. an offline Connector tunnel). When enabled, the
19+
# extension server attaches an Envoy local_reply_config to every customer-
20+
# facing HCM so visitors see a branded "temporarily unavailable" page instead
21+
# of a raw body like "no healthy upstream".
22+
#
23+
# bodyPath points at the optional ConfigMap mount (volume "error-pages" in
24+
# config/extension-server/deployment.yaml). When the file is absent or empty,
25+
# the page compiled into the operator image is used as the fallback — content
26+
# problems never block xDS or fail startup.
27+
errorPage:
28+
enabled: true
29+
bodyPath: /etc/datum/error-pages/error-5xx.html
30+
minStatusCode: 500

config/resource-metrics-policies/networking-metrics.yaml

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,30 @@ spec:
7474
- name: status
7575
value: "item.status"
7676

77+
- name: network-status-condition-last-transition-time
78+
resource:
79+
group: networking.datumapis.com
80+
version: v1alpha
81+
resource: networks
82+
families:
83+
- name: datum_cloud_network_status_condition_last_transition_time
84+
help: "last transition time for status conditions"
85+
type: gauge
86+
metrics:
87+
- forEach: "has(object.status) && has(object.status.conditions) ? object.status.conditions : []"
88+
value: "double(timestamp(item.lastTransitionTime).getSeconds())"
89+
labels:
90+
- name: name
91+
value: "object.metadata.name"
92+
- name: namespace
93+
value: "object.metadata.namespace"
94+
- name: condition
95+
value: "item.type"
96+
- name: reason
97+
value: "item.reason"
98+
- name: status
99+
value: "item.status"
100+
77101
# -------------------------------------------------------------------------
78102
# NetworkBinding
79103
# -------------------------------------------------------------------------
@@ -378,6 +402,34 @@ spec:
378402
- name: status
379403
value: "item.status"
380404

405+
- name: subnet-status-condition-last-transition-time
406+
resource:
407+
group: networking.datumapis.com
408+
version: v1alpha
409+
resource: subnets
410+
families:
411+
- name: datum_cloud_subnet_status_condition_last_transition_time
412+
help: "last transition time for status conditions"
413+
type: gauge
414+
metrics:
415+
- forEach: "has(object.status) && has(object.status.conditions) ? object.status.conditions : []"
416+
value: "double(timestamp(item.lastTransitionTime).getSeconds())"
417+
labels:
418+
- name: name
419+
value: "object.metadata.name"
420+
- name: namespace
421+
value: "object.metadata.namespace"
422+
- name: network_name
423+
value: "object.spec.network.name"
424+
- name: network_namespace
425+
value: "object.spec.network.namespace"
426+
- name: condition
427+
value: "item.type"
428+
- name: reason
429+
value: "item.reason"
430+
- name: status
431+
value: "item.status"
432+
381433
# -------------------------------------------------------------------------
382434
# SubnetClaim
383435
# -------------------------------------------------------------------------
@@ -472,6 +524,34 @@ spec:
472524
- name: status
473525
value: "item.status"
474526

527+
- name: subnet-claim-status-condition-last-transition-time
528+
resource:
529+
group: networking.datumapis.com
530+
version: v1alpha
531+
resource: subnetclaims
532+
families:
533+
- name: datum_cloud_subnet_claim_status_condition_last_transition_time
534+
help: "last transition time for status conditions"
535+
type: gauge
536+
metrics:
537+
- forEach: "has(object.status) && has(object.status.conditions) ? object.status.conditions : []"
538+
value: "double(timestamp(item.lastTransitionTime).getSeconds())"
539+
labels:
540+
- name: name
541+
value: "object.metadata.name"
542+
- name: namespace
543+
value: "object.metadata.namespace"
544+
- name: network_name
545+
value: "object.spec.network.name"
546+
- name: network_namespace
547+
value: "object.spec.network.namespace"
548+
- name: condition
549+
value: "item.type"
550+
- name: reason
551+
value: "item.reason"
552+
- name: status
553+
value: "item.status"
554+
475555
# -------------------------------------------------------------------------
476556
# Location
477557
# -------------------------------------------------------------------------
@@ -799,6 +879,30 @@ spec:
799879
- name: status
800880
value: "item.status"
801881

882+
- name: http-proxy-status-condition-last-transition-time
883+
resource:
884+
group: networking.datumapis.com
885+
version: v1alpha
886+
resource: httpproxies
887+
families:
888+
- name: datum_cloud_networking_http_proxy_status_condition_last_transition_time
889+
help: "last transition time for status conditions"
890+
type: gauge
891+
metrics:
892+
- forEach: "has(object.status) && has(object.status.conditions) ? object.status.conditions : []"
893+
value: "double(timestamp(item.lastTransitionTime).getSeconds())"
894+
labels:
895+
- name: name
896+
value: "object.metadata.name"
897+
- name: namespace
898+
value: "object.metadata.namespace"
899+
- name: condition
900+
value: "item.type"
901+
- name: reason
902+
value: "item.reason"
903+
- name: status
904+
value: "item.status"
905+
802906
- name: http-proxy-custom-hostname
803907
resource:
804908
group: networking.datumapis.com

docs/Taskfile.yaml

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
version: '3'
2+
3+
vars:
4+
DIAGRAMS_DIR: "{{.ROOT_DIR}}/docs/diagrams"
5+
OUTPUT_FORMAT: "png"
6+
PLANTUML_IMAGE: plantuml/plantuml:1.2026.4
7+
8+
tasks:
9+
generate:
10+
desc: Generate all documentation artifacts (diagrams, etc.)
11+
cmds:
12+
- task: diagrams:render
13+
silent: true
14+
15+
diagrams:
16+
desc: Generate all architecture diagrams from PlantUML
17+
cmds:
18+
- task: diagrams:render
19+
silent: true
20+
21+
diagrams:render:
22+
desc: Render PlantUML diagrams to PNG format using Docker
23+
cmds:
24+
- |
25+
set -e
26+
echo "Rendering PlantUML diagrams..."
27+
echo ""
28+
29+
# Check if PlantUML files exist
30+
if ! ls {{.DIAGRAMS_DIR}}/*.puml >/dev/null 2>&1; then
31+
echo "❌ Error: PlantUML source files (*.puml) not found in {{.DIAGRAMS_DIR}}"
32+
exit 1
33+
fi
34+
35+
# Render using Docker (no local installation required)
36+
docker run --rm \
37+
-v "{{.DIAGRAMS_DIR}}":/data \
38+
{{.PLANTUML_IMAGE}} \
39+
-t{{.OUTPUT_FORMAT}} \
40+
/data/*.puml
41+
42+
echo ""
43+
echo "✅ Diagrams rendered in {{.DIAGRAMS_DIR}}"
44+
echo ""
45+
echo "Generated files:"
46+
ls -1 {{.DIAGRAMS_DIR}}/*.{{.OUTPUT_FORMAT}} 2>/dev/null | xargs -n1 basename || echo "No output files found"
47+
silent: true
48+
49+
diagrams:clean:
50+
desc: Remove generated diagram files
51+
cmds:
52+
- |
53+
rm -f {{.DIAGRAMS_DIR}}/*.png {{.DIAGRAMS_DIR}}/*.svg
54+
echo "✅ Generated diagram files removed"
55+
silent: true
56+
57+
diagrams:validate:
58+
desc: Validate PlantUML syntax using Docker
59+
cmds:
60+
- |
61+
set -e
62+
echo "Validating PlantUML diagrams..."
63+
docker run --rm \
64+
-v "{{.DIAGRAMS_DIR}}":/data \
65+
{{.PLANTUML_IMAGE}} \
66+
-syntax \
67+
/data/*.puml
68+
echo "✅ All diagrams are valid"
69+
silent: true

docs/diagrams/http-metering-c4.png

46.1 KB
Loading
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
@startuml http-metering-c4
2+
!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Container.puml
3+
4+
LAYOUT_WITH_LEGEND()
5+
6+
title C4 Container Diagram - HTTP Traffic Metering System
7+
8+
Person(client, "End User / Client", "Requests services exposed via Datum Cloud Edge")
9+
10+
System_Boundary(edge_cluster, "Edge Cluster") {
11+
Container(envoy, "Envoy Gateway Proxy", "Envoy/Go", "Handles ingress HTTP traffic, terminates TLS, enforces WAF/rate-limiting, emits JSON access logs to stdout")
12+
Container(vector_collector, "billing-usage-collector-vector", "Vector DaemonSet (Billing)", "Tails Envoy container logs, parses JSON access logs, translates to CloudEvents, and forwards them to the Billing System")
13+
Container(nso, "Network Services Operator", "Go", "Deploys Envoy Gateway and configures EnvoyProxy logging policies")
14+
}
15+
16+
System_Boundary(control_plane, "Platform Control Plane") {
17+
Container(billing_system, "Billing System & Service Catalog", "Platform Service", "Handles service registration, event validation, attribution, and storage")
18+
}
19+
20+
Rel(client, envoy, "Sends HTTPS requests to", "HTTPS")
21+
Rel(nso, envoy, "Configures & manages", "Kubernetes API / EnvoyProxy CR")
22+
Rel_D(envoy, vector_collector, "Outputs JSON access logs to", "stdout / container logs")
23+
Rel_D(vector_collector, billing_system, "Forwards batched events to", "HTTPS CloudEvents")
24+
25+
@enduml
55.5 KB
Loading

0 commit comments

Comments
 (0)