From 9fcbcba64f16bc3ca6edf1dd17dfd5621c8c9e7d Mon Sep 17 00:00:00 2001 From: Nathan Kinkade Date: Wed, 3 Jun 2026 16:21:10 -0600 Subject: [PATCH] Removes node_exporter monitoring of old Linode IPv6 monitoring VM ... and adds it to the new GCE VM that will handle IPv6 monitoring. For convenience, deployment of node_exporter on the GCE VMs is handled in the existing script deploy_bbe_config.sh, since that script already handles getting BBE up and running on the VMs. --- config/federation/prometheus/alerts.yml | 8 ++--- .../prometheus/prometheus.yml.template | 11 +++--- deploy_bbe_config.sh | 35 +++++++++++++++++++ 3 files changed, 44 insertions(+), 10 deletions(-) diff --git a/config/federation/prometheus/alerts.yml b/config/federation/prometheus/alerts.yml index 90034c6a..809ceb1b 100644 --- a/config/federation/prometheus/alerts.yml +++ b/config/federation/prometheus/alerts.yml @@ -128,10 +128,10 @@ groups: cluster: prometheus-federation annotations: summary: The blackbox_exporter service is down or missing for IPv6 probes. - description: The blackbox_exporter for IPv6 checks runs in a Linode VM. - Make sure the VM is up and running. If it is, check the status of the - BBE container running in the VM. Domains for VMs are like - blackbox-exporter-ipv6..measurementlab.net. + description: The blackbox_exporter for IPv6 checks runs in a GCE VM in + each M-Lab GCP project. Make sure the VM is up and running. If it is, + check the status of the BBE container running in the VM. Domains for + VMs are like blackbox-exporter-ipv6..measurementlab.net. # Unable to scrape the Github Maintenance exporter or the job is missing. - alert: GithubMaintenanceExporterDownOrMissing diff --git a/config/federation/prometheus/prometheus.yml.template b/config/federation/prometheus/prometheus.yml.template index b0a484e4..e47eb869 100644 --- a/config/federation/prometheus/prometheus.yml.template +++ b/config/federation/prometheus/prometheus.yml.template @@ -370,17 +370,16 @@ scrape_configs: static_configs: - targets: ['eb.measurementlab.net:9100'] + # Scrape config for the node_exporter on the IPv6 GCE monitoring VM. + - job_name: 'ipv6-node-exporter' + static_configs: + - targets: ['blackbox-exporter-ipv6.{{PROJECT}}.measurementlab.net:9100'] + # Scrape config for the epoxy-boot-api. - job_name: 'epoxy-boot-api' static_configs: - targets: ['epoxy-boot-api.{{PROJECT}}.measurementlab.net:9000'] - # Scrape config for the node_exporter on the IPv6 Linode VM we use for - # monitoring IPv6. - - job_name: 'ipv6-node-exporter' - static_configs: - - targets: ['blackbox-exporter-ipv6.{{PROJECT}}.measurementlab.net:9100'] - # Scrape config for services running in data-pipeline cluster, which is # region based, so m-lab/gcp-service-discovery#33 causes problems. These # metrics are needed by the alerts like up{container="etl-gardener",instance=~".*:9090"} == 0 diff --git a/deploy_bbe_config.sh b/deploy_bbe_config.sh index d85f0b84..4a1492ea 100755 --- a/deploy_bbe_config.sh +++ b/deploy_bbe_config.sh @@ -19,6 +19,8 @@ VM_CONFIG_DIR="/etc/blackbox-exporter" VM_CONFIG_FILE="${VM_CONFIG_DIR}/config.yml" BBE_IMAGE="prom/blackbox-exporter:v0.20.0" CONTAINER_NAME="blackbox-exporter" +NODE_EXPORTER_IMAGE="prom/node-exporter:v1.8.2" +NODE_EXPORTER_CONTAINER="node-exporter" # Map projects to zones where the monitoring VM is deployed. ZONE_mlab_sandbox="us-central1-c" @@ -46,5 +48,38 @@ gcloud compute ssh ${GCE_OPTS} "${VM_NAME}" --command=" \ --volume ${VM_CONFIG_DIR}:${VM_CONFIG_DIR}:ro \ --restart always --name ${CONTAINER_NAME} ${BBE_IMAGE} \ --config.file=${VM_CONFIG_FILE}; \ + fi && \ + if ! sudo docker inspect ${NODE_EXPORTER_CONTAINER} > /dev/null 2>&1; then \ + sudo docker run --detach --network=host --pid=host \ + --volume /:/host:ro,rslave \ + --restart always --name ${NODE_EXPORTER_CONTAINER} ${NODE_EXPORTER_IMAGE} \ + --path.rootfs=/host \ + --no-collector.arp \ + --no-collector.bcache \ + --no-collector.bonding \ + --no-collector.conntrack \ + --no-collector.cpu \ + --no-collector.diskstats \ + --no-collector.edac \ + --no-collector.entropy \ + --no-collector.filefd \ + --no-collector.hwmon \ + --no-collector.infiniband \ + --no-collector.ipvs \ + --no-collector.mdadm \ + --no-collector.netclass \ + --no-collector.netstat \ + --no-collector.nfs \ + --no-collector.nfsd \ + --no-collector.sockstat \ + --no-collector.stat \ + --no-collector.systemd \ + --no-collector.textfile \ + --no-collector.time \ + --no-collector.timex \ + --no-collector.uname \ + --no-collector.vmstat \ + --no-collector.xfs \ + --no-collector.zfs; \ fi \ "