Skip to content

Commit b4e0303

Browse files
committed
WIP
1 parent aa26ab8 commit b4e0303

1 file changed

Lines changed: 84 additions & 131 deletions

File tree

monitoring/config.alloy

Lines changed: 84 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ logging {
44
}
55

66
locals {
7-
scrape_interval = "20s"
7+
scrape_interval = "30s"
88

99
gc_prom_url = sys.env("GCLOUD_HOSTED_METRICS_URL")
1010
gc_prom_id = sys.env("GCLOUD_HOSTED_METRICS_ID")
@@ -19,153 +19,106 @@ locals {
1919
host_root = "/host/root"
2020
}
2121

22-
remotecfg {
23-
id = sys.env("GCLOUD_FM_ID")
24-
url = sys.env("GCLOUD_FM_URL")
25-
poll_frequency = sys.env("GCLOUD_FM_POLL_FREQUENCY")
26-
27-
basic_auth {
28-
username = sys.env("GCLOUD_FM_HOSTED_ID")
29-
password = local.gc_api_key
30-
}
31-
}
32-
33-
// ###############################
34-
// #### Metrics Configuration ####
35-
// ###############################
36-
37-
// Configure a prometheus.remote_write component to send metrics to a Prometheus server.
38-
prometheus.remote_write "metrics_service" {
39-
endpoint {
40-
url = local.gc_prom_url
41-
42-
basic_auth {
43-
username = local.gc_prom_id
44-
password = local.gc_api_key
45-
}
46-
}
47-
}
48-
49-
// HOST ------------ //
50-
51-
discovery.relabel "integrations_node_exporter" {
52-
targets = prometheus.exporter.unix.integrations_node_exporter.targets
53-
54-
rule {
55-
target_label = "instance"
56-
replacement = local.hostname
57-
}
58-
59-
rule {
60-
target_label = "job"
61-
replacement = "integrations/node_exporter"
62-
}
22+
// locals {
23+
// scrape_interval = "15s"
24+
// gc_prom_url = "https://prometheus-prod-XX.grafana.net/api/prom/push"
25+
// gc_loki_url = "https://logs-prod-XX.grafana.net/loki/api/v1/push"
26+
// gc_username = "<YOUR_GRAFANA_CLOUD_USERNAME>"
27+
// gc_api_key = "<YOUR_GRAFANA_CLOUD_API_KEY>"
28+
// host_root = "/host/root"
29+
// hostname = env("HOSTNAME")
30+
// }
31+
32+
// remotecfg {
33+
// id = sys.env("GCLOUD_FM_ID")
34+
// url = sys.env("GCLOUD_FM_URL")
35+
// poll_frequency = sys.env("GCLOUD_FM_POLL_FREQUENCY")
36+
//
37+
// basic_auth {
38+
// username = sys.env("GCLOUD_FM_HOSTED_ID")
39+
// password = local.gc_api_key
40+
// }
41+
// }
42+
43+
otelcol.processor.resource "add_instance_label" {
44+
attributes {
45+
key = "instance"
46+
value = local.hostname
47+
action = "insert"
48+
}
6349
}
6450

65-
prometheus.exporter.unix "integrations_node_exporter" {
66-
disable_collectors = ["ipvs", "btrfs", "infiniband", "xfs", "zfs"]
67-
68-
filesystem {
69-
fs_types_exclude = "^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|tmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$"
70-
mount_points_exclude = "^/(dev|proc|run/credentials/.+|sys|var/lib/docker/.+)($|/)"
71-
mount_timeout = "5s"
72-
}
51+
otelcol.receiver.hostmetrics "default" {
52+
collection_interval = local.scrape_interval
7353

74-
netclass {
75-
ignored_devices = "^(veth.*|cali.*|[a-f0-9]{15})$"
54+
scrapers = {
55+
cpu = {}
56+
disk = {
57+
mount_point = local.host_root
7658
}
77-
78-
netdev {
79-
device_exclude = "^(veth.*|cali.*|[a-f0-9]{15})$"
59+
filesystem = {
60+
mount_point = local.host_root
8061
}
62+
load = {}
63+
memory = {}
64+
network = {}
65+
paging = {}
66+
uptime = {}
67+
processes = {}
68+
}
69+
70+
root_path = local.host_root
8171
}
8272

83-
prometheus.scrape "integrations_node_exporter" {
84-
targets = discovery.relabel.integrations_node_exporter.output
85-
forward_to = [prometheus.relabel.integrations_node_exporter.receiver]
73+
otelcol.receiver.docker_stats "default" {
74+
endpoint = "unix:///var/run/docker.sock"
75+
collection_interval = local.scrape_interval
8676
}
8777

88-
prometheus.relabel "integrations_node_exporter" {
89-
forward_to = [prometheus.remote_write.metrics_service.receiver]
90-
91-
rule {
92-
source_labels = ["__name__"]
93-
regex = "up|node_arp_entries|node_boot_time_seconds|node_context_switches_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_io_time_weighted_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filefd_maximum|node_filesystem_avail_bytes|node_filesystem_device_error|node_filesystem_files|node_filesystem_files_free|node_filesystem_readonly|node_filesystem_size_bytes|node_intr_total|node_load1|node_load15|node_load5|node_md_disks|node_md_disks_required|node_memory_Active_anon_bytes|node_memory_Active_bytes|node_memory_Active_file_bytes|node_memory_AnonHugePages_bytes|node_memory_AnonPages_bytes|node_memory_Bounce_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_CommitLimit_bytes|node_memory_Committed_AS_bytes|node_memory_DirectMap1G_bytes|node_memory_DirectMap2M_bytes|node_memory_DirectMap4k_bytes|node_memory_Dirty_bytes|node_memory_HugePages_Free|node_memory_HugePages_Rsvd|node_memory_HugePages_Surp|node_memory_HugePages_Total|node_memory_Hugepagesize_bytes|node_memory_Inactive_anon_bytes|node_memory_Inactive_bytes|node_memory_Inactive_file_bytes|node_memory_Mapped_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SReclaimable_bytes|node_memory_SUnreclaim_bytes|node_memory_ShmemHugePages_bytes|node_memory_ShmemPmdMapped_bytes|node_memory_Shmem_bytes|node_memory_Slab_bytes|node_memory_SwapTotal_bytes|node_memory_VmallocChunk_bytes|node_memory_VmallocTotal_bytes|node_memory_VmallocUsed_bytes|node_memory_WritebackTmp_bytes|node_memory_Writeback_bytes|node_netstat_Icmp6_InErrors|node_netstat_Icmp6_InMsgs|node_netstat_Icmp6_OutMsgs|node_netstat_Icmp_InErrors|node_netstat_Icmp_InMsgs|node_netstat_Icmp_OutMsgs|node_netstat_IpExt_InOctets|node_netstat_IpExt_OutOctets|node_netstat_TcpExt_ListenDrops|node_netstat_TcpExt_ListenOverflows|node_netstat_TcpExt_TCPSynRetrans|node_netstat_Tcp_InErrs|node_netstat_Tcp_InSegs|node_netstat_Tcp_OutRsts|node_netstat_Tcp_OutSegs|node_netstat_Tcp_RetransSegs|node_netstat_Udp6_InDatagrams|node_netstat_Udp6_InErrors|node_netstat_Udp6_NoPorts|node_netstat_Udp6_OutDatagrams|node_netstat_Udp6_RcvbufErrors|node_netstat_Udp6_SndbufErrors|node_netstat_UdpLite_InErrors|node_netstat_Udp_InDatagrams|node_netstat_Udp_InErrors|node_netstat_Udp_NoPorts|node_netstat_Udp_OutDatagrams|node_netstat_Udp_RcvbufErrors|node_netstat_Udp_SndbufErrors|node_network_carrier|node_network_info|node_network_mtu_bytes|node_network_receive_bytes_total|node_network_receive_compressed_total|node_network_receive_drop_total|node_network_receive_errs_total|node_network_receive_fifo_total|node_network_receive_multicast_total|node_network_receive_packets_total|node_network_speed_bytes|node_network_transmit_bytes_total|node_network_transmit_compressed_total|node_network_transmit_drop_total|node_network_transmit_errs_total|node_network_transmit_fifo_total|node_network_transmit_multicast_total|node_network_transmit_packets_total|node_network_transmit_queue_length|node_network_up|node_nf_conntrack_entries|node_nf_conntrack_entries_limit|node_os_info|node_sockstat_FRAG6_inuse|node_sockstat_FRAG_inuse|node_sockstat_RAW6_inuse|node_sockstat_RAW_inuse|node_sockstat_TCP6_inuse|node_sockstat_TCP_alloc|node_sockstat_TCP_inuse|node_sockstat_TCP_mem|node_sockstat_TCP_mem_bytes|node_sockstat_TCP_orphan|node_sockstat_TCP_tw|node_sockstat_UDP6_inuse|node_sockstat_UDPLITE6_inuse|node_sockstat_UDPLITE_inuse|node_sockstat_UDP_inuse|node_sockstat_UDP_mem|node_sockstat_UDP_mem_bytes|node_sockstat_sockets_used|node_softnet_dropped_total|node_softnet_processed_total|node_softnet_times_squeezed_total|node_systemd_unit_state|node_textfile_scrape_error|node_time_zone_offset_seconds|node_timex_estimated_error_seconds|node_timex_maxerror_seconds|node_timex_offset_seconds|node_timex_sync_status|node_uname_info|node_vmstat_oom_kill|node_vmstat_pgfault|node_vmstat_pgmajfault|node_vmstat_pgpgin|node_vmstat_pgpgout|node_vmstat_pswpin|node_vmstat_pswpout|process_max_fds|process_open_fds"
94-
action = "keep"
95-
}
96-
}
97-
98-
// DOCKER ------------ //
99-
100-
// Host Cadvisor on the Docker socket to expose container metrics.
101-
prometheus.exporter.cadvisor "linux" {
102-
docker_only = true
103-
}
104-
105-
discovery.relabel "linux" {
106-
targets = prometheus.exporter.cadvisor.linux.targets
107-
108-
rule {
109-
target_label = "job"
110-
replacement = "integrations/docker"
111-
}
112-
113-
rule {
114-
target_label = "instance"
115-
replacement = local.hostname
116-
}
117-
}
118-
119-
// Configure a prometheus.scrape component to collect cadvisor metrics.
120-
prometheus.scrape "scraper" {
121-
targets = discovery.relabel.linux.output
122-
forward_to = [prometheus.remote_write.metrics_service.receiver]
123-
124-
scrape_interval = local.scrape_interval
78+
loki.source.docker "default" {
79+
docker_host = "unix:///var/run/docker.sock"
80+
labels = {
81+
job = "docker-logs"
82+
instance = local.hostname
83+
}
12584
}
12685

127-
// ###############################
128-
// #### Logging Configuration ####
129-
// ###############################
130-
131-
loki.write "grafana_cloud_loki" {
132-
endpoint {
133-
url = local.gc_loki_url
86+
otelcol.processor.batch "default" {}
13487

135-
basic_auth {
136-
username = local.gc_loki_id
137-
password = local.gc_api_key
138-
}
139-
}
88+
otelcol.exporter.prometheusremotewrite "grafana_cloud" {
89+
endpoint = local.gc_prom_url
90+
headers = {
91+
"Authorization" = "Basic ${base64(local.gc_username + ":" + local.gc_api_key)}"
92+
}
14093
}
14194

142-
// DOCKER ------------ //
143-
144-
// Discover Docker containers and extract metadata.
145-
discovery.docker "linux" {
146-
host = "unix:///var/run/docker.sock"
95+
loki.exporter "grafana_cloud" {
96+
endpoint = local.gc_loki_url
97+
labels = {
98+
job = "container-logs"
99+
hostname = local.hostname
100+
}
101+
tenant_id = local.gc_username
102+
basic_auth {
103+
username = local.gc_username
104+
password = local.gc_api_key
105+
}
147106
}
148107

149-
// Define a relabeling rule to create a service name from the container name.
150-
discovery.relabel "logs_integrations_docker" {
151-
targets = []
152-
153-
rule {
154-
source_labels = ["__meta_docker_container_name"]
155-
regex = "/(.*)"
156-
target_label = "container_name"
157-
}
158-
159-
rule {
160-
target_label = "instance"
161-
replacement = local.hostname
108+
otelcol.service "metrics" {
109+
pipelines = {
110+
metrics = {
111+
receivers = [
112+
otelcol.receiver.hostmetrics.default,
113+
otelcol.receiver.docker_stats.default,
114+
]
115+
processors = [otelcol.processor.batch.default]
116+
exporters = [otelcol.exporter.prometheusremotewrite.grafana_cloud]
162117
}
118+
}
163119
}
164120

165-
// Configure a loki.source.docker component to collect logs from Docker containers.
166-
loki.source.docker "default" {
167-
host = "unix:///var/run/docker.sock"
168-
targets = discovery.docker.linux.targets
169-
relabel_rules = discovery.relabel.logs_integrations_docker.rules
170-
forward_to = [loki.write.grafana_cloud_loki.receiver]
121+
loki.service "logs" {
122+
sources = [loki.source.docker.default]
123+
exporters = [loki.exporter.grafana_cloud]
171124
}

0 commit comments

Comments
 (0)