-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfigmap.yaml
More file actions
188 lines (167 loc) · 4.84 KB
/
configmap.yaml
File metadata and controls
188 lines (167 loc) · 4.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
---
# TFO-Agent ConfigMap (Node DaemonSet)
# Per-node OS metrics: node_exporter ON, kubernetes collector OFF.
# Auth and endpoint are injected via env vars (TELEMETRYFLOW_API_KEY_ID/SECRET/ENDPOINT).
apiVersion: v1
kind: ConfigMap
metadata:
name: tfo-agent-config
namespace: telemetryflow
labels:
app.kubernetes.io/name: tfo-agent
app.kubernetes.io/component: monitoring
data:
tfo-agent.yaml: |
agent:
description: "TFO Agent - ${NODE_NAME}"
tags:
environment: "${ENVIRONMENT}"
cluster: "${CLUSTER_NAME}"
heartbeat:
interval: 60s
timeout: 10s
include_system_info: true
collectors:
node_exporter:
enabled: true
interval: 15s
cpu: true
memory: true
disk_io: true
filesystem: true
network: true
load_avg: true
thermal: false
textfile: false
conntrack: false
psi: false
vmstat: false
sockstat: false
# Native log collector (file tailing + journald)
logs:
enabled: false
paths: []
# Fluent Bit log collector (production-grade, bundled in image)
fluent_bit:
enabled: true
config_dir: /tmp/tfo-agent-fluentbit
flush_interval: 5
log_level: info
storage_enabled: true
health_check: true
restart_on_crash: true
tail:
enabled: true
paths: []
multiline_parser: "docker,cri"
systemd:
enabled: true
units: [kubelet, docker, containerd]
kubernetes:
enabled: false
# kubernetes is handled by the separate K8s Collector Deployment (tfo-agent-k8s)
kubernetes:
enabled: false
ebpf:
enabled: false
prometheus_server:
enabled: true
port: 8888
path: /metrics
exporter:
otlp:
enabled: true
batch_size: 100
flush_interval: 10s
compression: gzip
buffer:
enabled: true
max_size_mb: 100
path: /var/lib/tfo-agent/buffer
flush_interval: 30s
logging:
level: info
format: json
---
# TFO-Agent K8s Collector ConfigMap
# Single-replica Deployment: kubernetes collector ON, node_exporter OFF.
# Connects to TFO Platform backend REST API (not the OTLP Collector port).
# Auth and endpoint are injected via env vars.
apiVersion: v1
kind: ConfigMap
metadata:
name: tfo-agent-k8s-config
namespace: telemetryflow
labels:
app.kubernetes.io/name: tfo-agent
app.kubernetes.io/component: k8s-collector
data:
tfo-agent.yaml: |
agent:
description: "TFO K8s Collector - ${CLUSTER_NAME}"
tags:
environment: "${ENVIRONMENT}"
cluster: "${CLUSTER_NAME}"
heartbeat:
interval: 60s
timeout: 10s
collectors:
node_exporter:
enabled: false
kubernetes:
enabled: true
interval: 30s
kubeconfig: "" # empty = in-cluster ServiceAccount auto-detection
context: ""
namespaces: []
exclude_namespaces:
- kube-system
- kube-public
- kube-node-lease
nodes: true
pods: true
deployments: true
namespaces_collect: true
storage: true
services: true
workloads: true
events: true
resource_counts: true
network: true # Kubelet /stats/summary (requires nodes/proxy RBAC)
metrics_api: true # CPU/Memory usage from metrics-server (set false if not installed)
hpa: true
pdb: true
pod_logs: true
pod_logs_tail_lines: 100
node_logs: true
node_logs_tail_lines: 200
node_log_sources: [kubelet, kube-proxy, containerd]
# Extended metrics — replaces Prometheus + kube-state-metrics + cAdvisor
apiserver_metrics: true
coredns_metrics: true
coredns_service: "" # Auto-discover via pod labels (supports vanilla k8s, EKS, GKE, AKS, RKE2, k3s)
container_extended_metrics: true # cpu_throttled, memory_working_set, oom
pv_io_stats: true # PV usage + IOPS from Kubelet volume stats
sync_to_backend: true
sync_interval: 60s
cluster_name: "" # auto-detected from CLUSTER_NAME env or hostname
cluster_provider: "" # auto-detected from env/filesystem heuristics
# cluster_id is auto-registered on startup (find-or-create)
# Set TELEMETRYFLOW_K8S_CLUSTER_ID env var to skip auto-registration
ebpf:
enabled: false
prometheus_server:
enabled: true
port: 8888
path: /metrics
exporter:
otlp:
enabled: false # K8s state syncs directly to backend REST API, not via OTLP
buffer:
enabled: true
max_size_mb: 50
path: /var/lib/tfo-agent/buffer
flush_interval: 30s
logging:
level: info
format: json