Skip to content

Commit 8fddbd6

Browse files
committed
feat: Kubernetes network policy
2 parents 42b76eb + c9a9c8c commit 8fddbd6

6 files changed

Lines changed: 494 additions & 4 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
110110

111111
### Fixed
112112

113+
- **Default endpoint config** (`configs/tfo-agent.yaml`): Corrected default `TELEMETRYFLOW_ENDPOINT` from `http://localhost:3000/api/v2/monitoring` to `http://localhost:3000/api/v2` — aligns with all Kubernetes Helm templates, Docker Compose, and platform config files which consistently use `/api/v2` as base URL. Agent API paths already include `/monitoring/` prefix, so the previous default caused double `/monitoring` when using the built-in fallback
113114
- **Config env var expansion** (`internal/config/loader.go`): Viper-based config loader now calls `os.ExpandEnv()` on YAML content before parsing, resolving `${VAR}` placeholders in config values (e.g., `${NODE_IP}` in cAdvisor endpoint). Previously, env var references in config values were passed as literal strings, causing URL parse failures
114115
- **cAdvisor kubelet HTTPS** (`internal/collector/cadvisor/cadvisor.go`): HTTP client now respects `insecure_skip_verify` config and includes ServiceAccount bearer token in requests. Previously, HTTPS kubelet endpoints failed with `x509: certificate signed by unknown authority` and `403 Forbidden`
115116
- **cAdvisor Prometheus parser** (`internal/collector/cadvisor/cadvisor.go`): Switched to `LegacyValidation` parser to support traditional `container_*` and `machine_*` metric names; optional `metric_names` allowlist for selective collection

configs/tfo-agent.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# TELEMETRYFLOW_API_KEY_SECRET - API Key Secret for authentication (format: tfs_xxx)
1515
# TELEMETRYFLOW_ENDPOINT - TFO backend base URL
1616
# gRPC: tfo-collector:4317
17-
# HTTP: http://tfo-backend:3000/api/v2/monitoring
17+
# HTTP: http://tfo-backend:3000/api/v2
1818
# TELEMETRYFLOW_AGENT_ID - Unique agent ID (optional, auto-generated if unset)
1919
# TELEMETRYFLOW_AGENT_NAME - Human-readable agent name (optional)
2020
# TELEMETRYFLOW_ENVIRONMENT - Deployment environment tag, e.g. production (optional)
@@ -26,7 +26,7 @@
2626
# Example .env file:
2727
# TELEMETRYFLOW_API_KEY_ID=tfk_your_key_id
2828
# TELEMETRYFLOW_API_KEY_SECRET=tfs_your_key_secret
29-
# TELEMETRYFLOW_ENDPOINT=http://tfo-backend:3000/api/v2/monitoring # HTTP mode
29+
# TELEMETRYFLOW_ENDPOINT=http://tfo-backend:3000/api/v2 # HTTP mode
3030
# # TELEMETRYFLOW_ENDPOINT=tfo-collector:4317 # gRPC mode
3131
#
3232
# =============================================================================
@@ -42,10 +42,10 @@ telemetryflow:
4242
api_key_secret: "${TELEMETRYFLOW_API_KEY_SECRET}"
4343
# TFO backend endpoint — format depends on protocol:
4444
# grpc: bare host:port (e.g., tfo-collector:4317)
45-
# http: full HTTP base URL (e.g., http://tfo-backend:3000/api/v2/monitoring)
45+
# http: full HTTP base URL (e.g., http://tfo-backend:3000/api/v2)
4646
# Used for agent heartbeat/registration and as OTLP base URL when no
4747
# per-signal endpoint override is configured in exporter.otlp.*.endpoint.
48-
endpoint: "${TELEMETRYFLOW_ENDPOINT:-http://localhost:3000/api/v2/monitoring}"
48+
endpoint: "${TELEMETRYFLOW_ENDPOINT:-http://localhost:3000/api/v2}"
4949
# Protocol: grpc (OTLP native) or http (REST + OTLP/HTTP)
5050
protocol: http
5151
# TLS settings for backend connection

internal/collector/kubernetes/kubernetes.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,17 @@ func (k *KubernetesCollector) Collect(ctx context.Context) ([]collector.Metric,
267267
}
268268
}
269269

270+
// --- Network Policies ---
271+
{
272+
npMetrics, nps, err := collectNetworkPolicies(ctx, k.clientset, k.cfg, k.cfg.ClusterName)
273+
if err != nil {
274+
k.logger.Warn("Failed to collect network policy state", zap.Error(err))
275+
} else {
276+
allMetrics = append(allMetrics, npMetrics...)
277+
state.NetworkPolicies = nps
278+
}
279+
}
280+
270281
// --- Events ---
271282
if k.cfg.Events {
272283
metrics, events, err := collectEvents(ctx, k.clientset, k.cfg, k.cfg.ClusterName)
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
// Package kubernetes collects resource and performance metrics from a Kubernetes
2+
// cluster via the API server and Kubelet stats endpoints, covering nodes, pods,
3+
// deployments, services, namespaces, storage, network policies, HPAs, PDBs,
4+
// workload controllers, events, and pod logs.
5+
//
6+
// TelemetryFlow Agent - Community Enterprise Observability Platform
7+
// Copyright (c) 2024-2026 TelemetryFlow. All rights reserved.
8+
// Open Source Software built by DevOpsCorner Indonesia.
9+
//
10+
// Licensed under the Apache License, Version 2.0 (the "License");
11+
// you may not use this file except in compliance with the License.
12+
// You may obtain a copy of the License at
13+
//
14+
// http://www.apache.org/licenses/LICENSE-2.0
15+
//
16+
// Unless required by applicable law or agreed to in writing, software
17+
// distributed under the License is distributed on an "AS IS" BASIS,
18+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19+
// See the License for the specific language governing permissions and
20+
// limitations under the License.
21+
package kubernetes
22+
23+
import (
24+
"context"
25+
26+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27+
"k8s.io/client-go/kubernetes"
28+
29+
"github.com/telemetryflow/telemetryflow-agent/internal/collector"
30+
)
31+
32+
// collectNetworkPolicies gathers Kubernetes NetworkPolicy resources.
33+
func collectNetworkPolicies(
34+
ctx context.Context,
35+
cs kubernetes.Interface,
36+
cfg Config,
37+
cluster string,
38+
) ([]collector.Metric, []NetworkPolicyState, error) {
39+
npList, err := cs.NetworkingV1().NetworkPolicies("").List(ctx, metav1.ListOptions{})
40+
if err != nil {
41+
return nil, nil, err
42+
}
43+
44+
var metrics []collector.Metric
45+
var states []NetworkPolicyState
46+
nsCounts := make(map[string]int)
47+
48+
for i := range npList.Items {
49+
np := &npList.Items[i]
50+
51+
if !cfg.shouldCollectNamespace(np.Namespace) {
52+
continue
53+
}
54+
55+
nsCounts[np.Namespace]++
56+
57+
// Policy types
58+
var policyTypes []string
59+
for _, pt := range np.Spec.PolicyTypes {
60+
policyTypes = append(policyTypes, string(pt))
61+
}
62+
63+
// Pod selector (which pods this policy applies to)
64+
podSelector := make(map[string]string)
65+
for k, v := range np.Spec.PodSelector.MatchLabels {
66+
podSelector[k] = v
67+
}
68+
69+
// Ingress rules count
70+
ingressRuleCount := len(np.Spec.Ingress)
71+
72+
// Egress rules count
73+
egressRuleCount := len(np.Spec.Egress)
74+
75+
// Build ingress rules detail
76+
var ingressRules []NetworkPolicyRule
77+
for _, rule := range np.Spec.Ingress {
78+
var ports []NetworkPolicyPort
79+
for _, p := range rule.Ports {
80+
npp := NetworkPolicyPort{
81+
Protocol: "TCP",
82+
}
83+
if p.Protocol != nil {
84+
npp.Protocol = string(*p.Protocol)
85+
}
86+
if p.Port != nil {
87+
npp.Port = p.Port.String()
88+
}
89+
ports = append(ports, npp)
90+
}
91+
var fromPeers []NetworkPolicyPeer
92+
for _, from := range rule.From {
93+
peer := NetworkPolicyPeer{}
94+
if from.PodSelector != nil {
95+
peer.PodSelector = from.PodSelector.MatchLabels
96+
}
97+
if from.NamespaceSelector != nil {
98+
peer.NamespaceSelector = from.NamespaceSelector.MatchLabels
99+
}
100+
if from.IPBlock != nil {
101+
peer.IPBlock = &NetworkPolicyIPBlock{
102+
CIDR: from.IPBlock.CIDR,
103+
Except: from.IPBlock.Except,
104+
}
105+
}
106+
fromPeers = append(fromPeers, peer)
107+
}
108+
ingressRules = append(ingressRules, NetworkPolicyRule{
109+
Ports: ports,
110+
Peers: fromPeers,
111+
})
112+
}
113+
114+
// Build egress rules detail
115+
var egressRules []NetworkPolicyRule
116+
for _, rule := range np.Spec.Egress {
117+
var ports []NetworkPolicyPort
118+
for _, p := range rule.Ports {
119+
npp := NetworkPolicyPort{
120+
Protocol: "TCP",
121+
}
122+
if p.Protocol != nil {
123+
npp.Protocol = string(*p.Protocol)
124+
}
125+
if p.Port != nil {
126+
npp.Port = p.Port.String()
127+
}
128+
ports = append(ports, npp)
129+
}
130+
var toPeers []NetworkPolicyPeer
131+
for _, to := range rule.To {
132+
peer := NetworkPolicyPeer{}
133+
if to.PodSelector != nil {
134+
peer.PodSelector = to.PodSelector.MatchLabels
135+
}
136+
if to.NamespaceSelector != nil {
137+
peer.NamespaceSelector = to.NamespaceSelector.MatchLabels
138+
}
139+
if to.IPBlock != nil {
140+
peer.IPBlock = &NetworkPolicyIPBlock{
141+
CIDR: to.IPBlock.CIDR,
142+
Except: to.IPBlock.Except,
143+
}
144+
}
145+
toPeers = append(toPeers, peer)
146+
}
147+
egressRules = append(egressRules, NetworkPolicyRule{
148+
Ports: ports,
149+
Peers: toPeers,
150+
})
151+
}
152+
153+
states = append(states, NetworkPolicyState{
154+
Name: np.Name,
155+
Namespace: np.Namespace,
156+
PolicyTypes: policyTypes,
157+
PodSelector: podSelector,
158+
IngressRuleCount: ingressRuleCount,
159+
EgressRuleCount: egressRuleCount,
160+
IngressRules: ingressRules,
161+
EgressRules: egressRules,
162+
Labels: np.Labels,
163+
CreatedAt: np.CreationTimestamp.UnixMilli(),
164+
})
165+
}
166+
167+
// Metrics: network policy count per namespace
168+
for ns, count := range nsCounts {
169+
metrics = append(metrics,
170+
collector.NewMetric("k8s.networkpolicy.count", float64(count), collector.MetricTypeGauge).
171+
WithLabel("cluster", cluster).
172+
WithLabel("namespace", ns).
173+
WithDescription("NetworkPolicy count per namespace"),
174+
)
175+
}
176+
177+
return metrics, states, nil
178+
}

internal/collector/kubernetes/types.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ type ClusterState struct {
3939
Services []ServiceState `json:"services,omitempty"`
4040
Endpoints []EndpointState `json:"endpoints,omitempty"`
4141
Ingresses []IngressState `json:"ingresses,omitempty"`
42+
NetworkPolicies []NetworkPolicyState `json:"network_policies,omitempty"`
4243
PVs []PVState `json:"pvs,omitempty"`
4344
PVCs []PVCState `json:"pvcs,omitempty"`
4445
Events []EventState `json:"events,omitempty"`
@@ -411,6 +412,45 @@ type EventState struct {
411412
LastTimestamp int64 `json:"last_timestamp"` // Unix millis
412413
}
413414

415+
// NetworkPolicyState represents a Kubernetes NetworkPolicy resource.
416+
type NetworkPolicyState struct {
417+
Name string `json:"name"`
418+
Namespace string `json:"namespace"`
419+
PolicyTypes []string `json:"policy_types,omitempty"` // Ingress, Egress
420+
PodSelector map[string]string `json:"pod_selector,omitempty"`
421+
IngressRuleCount int `json:"ingress_rule_count"`
422+
EgressRuleCount int `json:"egress_rule_count"`
423+
IngressRules []NetworkPolicyRule `json:"ingress_rules,omitempty"`
424+
EgressRules []NetworkPolicyRule `json:"egress_rules,omitempty"`
425+
Labels map[string]string `json:"labels,omitempty"`
426+
CreatedAt int64 `json:"created_at,omitempty"` // Unix millis
427+
}
428+
429+
// NetworkPolicyRule represents a single ingress or egress rule.
430+
type NetworkPolicyRule struct {
431+
Ports []NetworkPolicyPort `json:"ports,omitempty"`
432+
Peers []NetworkPolicyPeer `json:"peers,omitempty"`
433+
}
434+
435+
// NetworkPolicyPort represents a port allowed by a network policy.
436+
type NetworkPolicyPort struct {
437+
Protocol string `json:"protocol"`
438+
Port string `json:"port,omitempty"` // port number or named port
439+
}
440+
441+
// NetworkPolicyPeer represents a source/destination peer in a network policy.
442+
type NetworkPolicyPeer struct {
443+
PodSelector map[string]string `json:"pod_selector,omitempty"`
444+
NamespaceSelector map[string]string `json:"namespace_selector,omitempty"`
445+
IPBlock *NetworkPolicyIPBlock `json:"ip_block,omitempty"`
446+
}
447+
448+
// NetworkPolicyIPBlock represents a CIDR block in a network policy.
449+
type NetworkPolicyIPBlock struct {
450+
CIDR string `json:"cidr"`
451+
Except []string `json:"except,omitempty"`
452+
}
453+
414454
// ResourceCounts holds per-namespace resource counts for the overview dashboard.
415455
type ResourceCounts struct {
416456
Secrets map[string]int `json:"secrets,omitempty"`

0 commit comments

Comments
 (0)