File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -26,12 +26,18 @@ collectors:
2626 pdb : true
2727 events : true
2828 metrics_api : true
29- # KSM gaps (new)
29+ # KSM gaps
3030 resource_quotas : true
3131 limit_ranges : true
3232 pod_conditions : true
3333 node_taints : true
3434 workload_generations : true
35+ # Extended metrics — full observability without external tools
36+ apiserver_metrics : true
37+ coredns_metrics : true
38+ coredns_service : " kube-dns.kube-system.svc.cluster.local:9153"
39+ container_extended_metrics : true
40+ pv_io_stats : true
3541
3642 prometheus_scraper :
3743 enabled : true
Original file line number Diff line number Diff line change @@ -246,6 +246,12 @@ collectors:
246246 pod_logs : true # Collect recent log lines from each running container
247247 pod_logs_tail_lines : 100 # Log lines per container per collection cycle
248248 pod_logs_namespaces : [] # Restrict pod log collection to these namespaces (empty = same as namespace filter)
249+ # Extended metrics — TFO Agent replaces Prometheus + kube-state-metrics + cAdvisor
250+ apiserver_metrics : true # Scrape kube-apiserver /metrics endpoint
251+ coredns_metrics : true # Scrape CoreDNS /metrics endpoint
252+ coredns_service : " kube-dns.kube-system.svc.cluster.local:9153"
253+ container_extended_metrics : true # cpu_throttled, memory_working_set, oom (via Kubelet)
254+ pv_io_stats : true # PV usage + IOPS from Kubelet volume stats
249255 # Sync resource state to TFO backend (PostgreSQL entities)
250256 sync_to_backend : true
251257 sync_interval : 60s
Original file line number Diff line number Diff line change @@ -247,6 +247,12 @@ collectors:
247247 pod_logs : true # Collect recent log lines from each running container
248248 pod_logs_tail_lines : 100 # Log lines per container per collection cycle
249249 pod_logs_namespaces : [] # Restrict pod log collection to these namespaces (empty = same as namespace filter)
250+ # Extended metrics — TFO Agent replaces Prometheus + kube-state-metrics + cAdvisor
251+ apiserver_metrics : true # Scrape kube-apiserver /metrics endpoint
252+ coredns_metrics : true # Scrape CoreDNS /metrics endpoint
253+ coredns_service : " kube-dns.kube-system.svc.cluster.local:9153" # CoreDNS service address
254+ container_extended_metrics : true # cpu_throttled, memory_working_set, oom (via Kubelet /stats/summary + cAdvisor)
255+ pv_io_stats : true # PV usage + IOPS from Kubelet volume stats
250256 # Sync resource state to TFO backend (populates PostgreSQL K8s entities)
251257 sync_to_backend : true
252258 sync_interval : 60s
Original file line number Diff line number Diff line change @@ -67,9 +67,28 @@ rules:
6767 - pods
6868 verbs : ["get", "list"]
6969
70- # Non-resource URLs (e.g. /metrics, /healthz)
70+ # Pod logs (for pod_logs collector)
71+ - apiGroups : [""]
72+ resources :
73+ - pods/log
74+ verbs : ["get", "list"]
75+
76+ # Policy resources (PDB)
77+ - apiGroups : ["policy"]
78+ resources :
79+ - poddisruptionbudgets
80+ verbs : ["get", "list", "watch"]
81+
82+ # Discovery (EndpointSlices)
83+ - apiGroups : ["discovery.k8s.io"]
84+ resources :
85+ - endpointslices
86+ verbs : ["get", "list", "watch"]
87+
88+ # Non-resource URLs (metrics, healthz, cAdvisor)
7189 - nonResourceURLs :
7290 - /metrics
91+ - /metrics/cadvisor
7392 - /healthz
7493 - /readyz
7594 verbs : ["get"]
Original file line number Diff line number Diff line change @@ -27,11 +27,11 @@ config:
2727 enabled : true
2828
2929 kubernetes :
30- enabled : false # overridden to true in kubernetes.config section below
30+ enabled : false # overridden to true in kubernetes.config section below
3131
3232 prometheus_scraper :
3333 enabled : true
34- scrape_jobs : [] # user fills in their targets
34+ scrape_jobs : [] # user fills in their targets
3535
3636 remote_write_receiver :
3737 enabled : true
@@ -48,3 +48,9 @@ kubernetes:
4848 node_taints : true
4949 workload_generations : true
5050 metrics_api : true
51+ # Extended metrics — full observability without external tools
52+ apiserver_metrics : true
53+ coredns_metrics : true
54+ coredns_service : " kube-dns.kube-system.svc.cluster.local:9153"
55+ container_extended_metrics : true
56+ pv_io_stats : true
Original file line number Diff line number Diff line change @@ -25,7 +25,7 @@ environment: production
2525image :
2626 repository : ghcr.io/telemetryflow/tfo-agent
2727 pullPolicy : IfNotPresent
28- tag : " " # Defaults to Chart.appVersion
28+ tag : " " # Defaults to Chart.appVersion
2929
3030imagePullSecrets : []
3131
@@ -298,6 +298,12 @@ kubernetes:
298298 pod_logs_namespaces : []
299299 exclude_namespaces :
300300 - kube-system
301+ # Extended metrics — TFO Agent collects directly (replaces Prometheus stack)
302+ apiserver_metrics : true # Scrape kube-apiserver /metrics
303+ coredns_metrics : true # Scrape CoreDNS /metrics
304+ coredns_service : " kube-dns.kube-system.svc.cluster.local:9153"
305+ container_extended_metrics : true # cpu_throttled, memory_working_set, oom (via Kubelet)
306+ pv_io_stats : true # PV usage + IOPS (via Kubelet volume stats)
301307 # KSM gaps (new — all default false, enabled by oneForAll.enabled)
302308 resource_quotas : false
303309 limit_ranges : false
Original file line number Diff line number Diff line change @@ -121,6 +121,16 @@ data:
121121 resource_counts: true
122122 network: true # Kubelet /stats/summary (requires nodes/proxy RBAC)
123123 metrics_api: true # CPU/Memory usage from metrics-server (set false if not installed)
124+ hpa: true
125+ pdb: true
126+ pod_logs: true
127+ pod_logs_tail_lines: 100
128+ # Extended metrics — replaces Prometheus + kube-state-metrics + cAdvisor
129+ apiserver_metrics: true
130+ coredns_metrics: true
131+ coredns_service: "kube-dns.kube-system.svc.cluster.local:9153"
132+ container_extended_metrics: true # cpu_throttled, memory_working_set, oom
133+ pv_io_stats: true # PV usage + IOPS from Kubelet volume stats
124134 sync_to_backend: true
125135 sync_interval: 60s
126136 cluster_name: "" # auto-detected from CLUSTER_NAME env or hostname
Original file line number Diff line number Diff line change @@ -140,7 +140,7 @@ spec:
140140 - name : TELEMETRYFLOW_NODE_EXPORTER_ENABLED
141141 value : " true"
142142 - name : TELEMETRYFLOW_K8S_ENABLED
143- value : " false" # K8s state handled by tfo-agent-k8s Deployment (deployment-k8s.yaml)
143+ value : " false" # K8s state handled by tfo-agent-k8s Deployment (deployment-k8s.yaml)
144144
145145 # Prometheus server for liveness/readiness probes
146146 - name : TELEMETRYFLOW_PROMETHEUS_ENABLED
@@ -156,7 +156,7 @@ spec:
156156
157157 # Cluster and environment tags for OTEL resource attributes
158158 - name : CLUSTER_NAME
159- value : " " # override with your cluster name, or auto-detected from hostname
159+ value : " " # override with your cluster name, or auto-detected from hostname
160160 - name : ENVIRONMENT
161161 value : " production"
162162
@@ -205,15 +205,15 @@ spec:
205205 mountPropagation : HostToContainer
206206
207207 securityContext :
208- runAsUser : 0 # required to read /proc and /sys for node metrics
208+ runAsUser : 0 # required to read /proc and /sys for node metrics
209209 runAsGroup : 0
210210 readOnlyRootFilesystem : true
211211 allowPrivilegeEscalation : false
212212 capabilities :
213213 drop :
214214 - ALL
215215 add :
216- - SYS_PTRACE # process inspection for node metrics
216+ - SYS_PTRACE # process inspection for node metrics
217217
218218 volumes :
219219 - name : config
Original file line number Diff line number Diff line change @@ -29,7 +29,7 @@ metadata:
2929 app.kubernetes.io/component : k8s-collector
3030 app.kubernetes.io/part-of : telemetryflow
3131spec :
32- replicas : 1 # exactly 1 — multiple replicas would duplicate cluster state syncs
32+ replicas : 1 # exactly 1 — multiple replicas would duplicate cluster state syncs
3333 selector :
3434 matchLabels :
3535 app.kubernetes.io/name : tfo-agent
@@ -134,7 +134,7 @@ spec:
134134 - name : TELEMETRYFLOW_K8S_ENABLED
135135 value : " true"
136136 - name : TELEMETRYFLOW_NODE_EXPORTER_ENABLED
137- value : " false" # node metrics handled by tfo-agent DaemonSet
137+ value : " false" # node metrics handled by tfo-agent DaemonSet
138138
139139 # Prometheus server for liveness/readiness probes
140140 - name : TELEMETRYFLOW_PROMETHEUS_ENABLED
@@ -144,7 +144,7 @@ spec:
144144
145145 # Cluster identity — used in auto-registration name and OTEL resource attributes
146146 - name : CLUSTER_NAME
147- value : " " # override with your cluster name; auto-detected from hostname if empty
147+ value : " " # override with your cluster name; auto-detected from hostname if empty
148148 - name : ENVIRONMENT
149149 value : " production"
150150
Original file line number Diff line number Diff line change @@ -79,6 +79,18 @@ rules:
7979 - volumeattachments
8080 verbs : ["get", "list", "watch"]
8181
82+ # Pod logs — for pod_logs collector
83+ - apiGroups : [""]
84+ resources :
85+ - pods/log
86+ verbs : ["get", "list"]
87+
88+ # Policy — PodDisruptionBudgets
89+ - apiGroups : ["policy"]
90+ resources :
91+ - poddisruptionbudgets
92+ verbs : ["get", "list", "watch"]
93+
8294 # Events API (events.k8s.io/v1 replaces core/v1 events in K8s 1.19+)
8395 - apiGroups : ["events.k8s.io"]
8496 resources :
You can’t perform that action at this time.
0 commit comments